From 898d4def88738b3637666ec5c8ff5ccd3c863693 Mon Sep 17 00:00:00 2001 From: Dewen Qi Date: Fri, 8 Jul 2022 12:00:08 -0700 Subject: [PATCH 1/2] change: Update contents of key pipeline notebooks based on latest SDK update --- ...ocess-train-evaluate-batch-transform.ipynb | 355 +- ...ain-evaluate-batch-transform_outputs.ipynb | 4503 ----------------- .../tabular/lambda-step/iam_helper.py | 42 +- .../sagemaker-pipelines-lambda-step.ipynb | 235 +- ...emaker-pipelines-lambda-step_outputs.ipynb | 1724 ------- ...pipeline-model-monitor-clarify-steps.ipynb | 305 +- ...register and deploy a pipeline model.ipynb | 349 +- .../sagemaker-pipelines-tuning-step.ipynb | 264 +- 8 files changed, 1058 insertions(+), 6719 deletions(-) delete mode 100644 sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb delete mode 100644 sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb diff --git a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb index 891f939a6a..d3b68fff80 100644 --- a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb +++ b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb @@ -110,20 +110,25 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import sys\n", "\n", - "!{sys.executable} -m pip install \"sagemaker==2.91.1\"\n", + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"\n", "\n", "import boto3\n", "import sagemaker\n", - "\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", "\n", "sagemaker_session = sagemaker.session.Session()\n", "region = sagemaker_session.boto_region_name\n", "role = sagemaker.get_execution_role()\n", + "pipeline_session = PipelineSession()\n", "default_bucket = sagemaker_session.default_bucket()\n", "model_package_group_name = f\"AbaloneModelPackageGroupName\"" ] @@ -138,7 +143,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "!mkdir -p data" @@ -147,7 +156,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "local_path = \"data/abalone-dataset.csv\"\n", @@ -175,7 +188,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "local_path = \"data/abalone-dataset-batch\"\n", @@ -222,7 +239,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.parameters import (\n", @@ -231,7 +252,6 @@ " ParameterFloat,\n", ")\n", "\n", - "\n", "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", "instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", "model_approval_status = ParameterString(\n", @@ -275,19 +295,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "!mkdir -p abalone" + "!mkdir -p code" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile abalone/preprocessing.py\n", + "%%writefile code/preprocessing.py\n", "import argparse\n", "import os\n", "import requests\n", @@ -394,7 +422,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.sklearn.processing import SKLearnProcessor\n", @@ -408,6 +440,7 @@ " instance_count=processing_instance_count,\n", " base_job_name=\"sklearn-abalone-process\",\n", " role=role,\n", + " sagemaker_session=pipeline_session,\n", ")" ] }, @@ -415,26 +448,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, use the processor instance to construct a `ProcessingStep`, along with the input and output channels, and the code that runs when the pipeline invokes pipeline execution. This is similar to a processor instance's `run()` method in the Python SDK.\n", - "\n", - "Note the `input_data` parameters passed into `ProcessingStep` is the input data used in the step. This input data is used by the processor instance when it is run.\n", + "Finally, we take the output of the processor's `run` method and pass that as arguments to the `ProcessingStep`. By passing the `pipeline_session` to the `sagemaker_session`, calling `.run()` does not launch the processing job, it returns the arguments needed to run the job as a step in the pipeline.\n", "\n", - "Also, note the `\"train_data\"` and `\"test_data\"` named channels specified in the output configuration for the processing job. Step `Properties` can be used in subsequent steps and resolve to their runtime values at execution. Specifically, this usage is called out when you define the training step." + "Note the `\"train_data\"` and `\"test_data\"` named channels specified in the output configuration for the processing job. Step `Properties` can be used in subsequent steps and resolve to their runtime values at execution. Specifically, this usage is called out when you define the training step." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", "from sagemaker.workflow.steps import ProcessingStep\n", "\n", - "\n", - "step_process = ProcessingStep(\n", - " name=\"AbaloneProcess\",\n", - " processor=sklearn_processor,\n", + "processor_args = sklearn_processor.run(\n", " inputs=[\n", " ProcessingInput(source=input_data, destination=\"/opt/ml/processing/input\"),\n", " ],\n", @@ -443,8 +475,10 @@ " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", " ],\n", - " code=\"abalone/preprocessing.py\",\n", - ")" + " code=\"code/preprocessing.py\",\n", + ")\n", + "\n", + "step_process = ProcessingStep(name=\"AbaloneProcess\", step_args=processor_args)" ] }, { @@ -470,11 +504,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.estimator import Estimator\n", - "\n", + "from sagemaker.inputs import TrainingInput\n", "\n", "model_path = f\"s3://{default_bucket}/AbaloneTrain\"\n", "image_uri = sagemaker.image_uris.retrieve(\n", @@ -490,6 +528,7 @@ " instance_count=1,\n", " output_path=model_path,\n", " role=role,\n", + " sagemaker_session=pipeline_session,\n", ")\n", "xgb_train.set_hyperparameters(\n", " objective=\"reg:linear\",\n", @@ -499,6 +538,21 @@ " gamma=4,\n", " min_child_weight=6,\n", " subsample=0.7,\n", + ")\n", + "\n", + "train_args = xgb_train.fit(\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"validation\"\n", + " ].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " }\n", ")" ] }, @@ -506,15 +560,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, use the estimator instance to construct a `TrainingStep` as well as the `properties` of the prior `ProcessingStep` used as input in the `TrainingStep` inputs and the code that's executed when the pipeline invokes the pipeline execution. This is similar to an estimator's `fit` method in the Python SDK.\n", + "Finally, we use the output of the estimator's `.fit()` method as arguments to the `TrainingStep`. By passing the `pipeline_session` to the `sagemaker_session`, calling `.fit()` does not launch the training job, it returns the arguments needed to run the job as a step in the pipeline.\n", "\n", - "Pass in the `S3Uri` of the `\"train_data\"` output channel to the `TrainingStep`. Also, use the other `\"test_data\"` output channel for model evaluation in the pipeline. The `properties` attribute of a Pipeline step matches the object model of the corresponding response of a describe call. These properties can be referenced as placeholder values and are resolved at runtime. For example, the `ProcessingStep` `properties` attribute matches the object model of the [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response object." + "Pass in the `S3Uri` of the `\"train_data\"` output channel to the `.fit()` method. Also, use the other `\"test_data\"` output channel for model evaluation in the pipeline. The `properties` attribute of a Pipeline step matches the object model of the corresponding response of a describe call. These properties can be referenced as placeholder values and are resolved at runtime. For example, the `ProcessingStep` `properties` attribute matches the object model of the [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response object." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.inputs import TrainingInput\n", @@ -523,19 +581,7 @@ "\n", "step_train = TrainingStep(\n", " name=\"AbaloneTrain\",\n", - " estimator=xgb_train,\n", - " inputs={\n", - " \"train\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " \"validation\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", - " \"validation\"\n", - " ].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " },\n", + " step_args=train_args,\n", ")" ] }, @@ -568,10 +614,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile abalone/evaluation.py\n", + "%%writefile code/evaluation.py\n", "import json\n", "import pathlib\n", "import pickle\n", @@ -628,7 +678,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.processing import ScriptProcessor\n", @@ -641,6 +695,24 @@ " instance_count=1,\n", " base_job_name=\"script-abalone-eval\",\n", " role=role,\n", + " sagemaker_session=pipeline_session,\n", + ")\n", + "\n", + "eval_args = script_eval.run(\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", + " ],\n", + " code=\"code/evaluation.py\",\n", ")" ] }, @@ -648,7 +720,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Use the processor instance to construct a `ProcessingStep`, along with the input and output channels and the code that runs when the pipeline invokes pipeline execution. This is similar to a processor instance's `run` method in the Python SDK.\n", + "Use the processor's arguments returned by `.run()` to construct a `ProcessingStep`, along with the input and output channels and the code that will be executed when the pipeline invokes pipeline execution.\n", "\n", "Specifically, the `S3ModelArtifacts` from the `step_train` `properties` and the `S3Uri` of the `\"test_data\"` output channel of the `step_process` `properties` are passed as inputs. The `TrainingStep` and `ProcessingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) and [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response objects, respectively." ] @@ -656,7 +728,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.properties import PropertyFile\n", @@ -667,21 +743,7 @@ ")\n", "step_eval = ProcessingStep(\n", " name=\"AbaloneEval\",\n", - " processor=script_eval,\n", - " inputs=[\n", - " ProcessingInput(\n", - " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " destination=\"/opt/ml/processing/model\",\n", - " ),\n", - " ProcessingInput(\n", - " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", - " destination=\"/opt/ml/processing/test\",\n", - " ),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", - " ],\n", - " code=\"abalone/evaluation.py\",\n", + " step_args=eval_args,\n", " property_files=[evaluation_report],\n", ")" ] @@ -699,7 +761,7 @@ "source": [ "## Define a Create Model Step to Create a Model\n", "\n", - "In order to perform batch transformation using the example model, create a SageMaker model. \n", + "In order to perform batch transformation using the example model, create a SageMaker model.\n", "\n", "Specifically, pass in the `S3ModelArtifacts` from the `TrainingStep`, `step_train` properties. The `TrainingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) response object." ] @@ -707,16 +769,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.model import Model\n", "\n", - "\n", "model = Model(\n", " image_uri=image_uri,\n", " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")" ] @@ -725,27 +790,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Supply the model input (`instance_type` and `accelerator_type`) for creating the SageMaker Model, and then define the `CreateModelStep`, passing in the inputs and the model instance defined before." + "Define the `ModelStep` by providing the return values from `model.create()` as the step arguments." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.inputs import CreateModelInput\n", - "from sagemaker.workflow.steps import CreateModelStep\n", - "\n", + "from sagemaker.workflow.model_step import ModelStep\n", "\n", - "inputs = CreateModelInput(\n", - " instance_type=\"ml.m5.large\",\n", - " accelerator_type=\"ml.eia1.medium\",\n", - ")\n", - "step_create_model = CreateModelStep(\n", + "step_create_model = ModelStep(\n", " name=\"AbaloneCreateModel\",\n", - " model=model,\n", - " inputs=inputs,\n", + " step_args=model.create(instance_type=\"ml.m5.large\", accelerator_type=\"ml.eia1.medium\"),\n", ")" ] }, @@ -763,7 +826,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.transformer import Transformer\n", @@ -787,7 +854,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.inputs import TransformInput\n", @@ -805,26 +876,35 @@ "source": [ "## Define a Register Model Step to Create a Model Package\n", "\n", - "Use the estimator instance specified in the training step to construct an instance of `RegisterModel`. The result of executing `RegisterModel` in a pipeline is a model package. A model package is an abstraction of reusable model artifacts that packages all ingredients required for inference. Primarily, it consists of an inference specification that defines the inference image to use along with an optional model weights location.\n", + "A model package is an abstraction of reusable model artifacts that packages all ingredients required for inference. Primarily, it consists of an inference specification that defines the inference image to use along with an optional model weights location.\n", "\n", "A model package group is a collection of model packages. A model package group can be created for a specific ML business problem, and new versions of the model packages can be added to it. Typically, customers are expected to create a ModelPackageGroup for a SageMaker pipeline so that model package versions can be added to the group for every SageMaker Pipeline run.\n", "\n", - "The construction of `RegisterModel` is similar to an estimator instance's `register` method in the Python SDK.\n", - "\n", - "Specifically, pass in the `S3ModelArtifacts` from the `TrainingStep`, `step_train` properties. The `TrainingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) response object.\n", + "To register a model in the Model Registry, we take the model created in the previous steps\n", + "```\n", + "model = Model(\n", + " image_uri=image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")\n", + "```\n", + "and call the `.register()` function on it while passing all the parameters needed for registering the model.\n", "\n", - "Note that the specific model package group name provided in this notebook can be used in the model registry and CI/CD work with SageMaker Projects." + "We take the outputs of the `.register()` call and pass that to the `ModelStep` as step arguments." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.model_metrics import MetricsSource, ModelMetrics\n", - "from sagemaker.workflow.step_collections import RegisterModel\n", - "\n", "\n", "model_metrics = ModelMetrics(\n", " model_statistics=MetricsSource(\n", @@ -834,10 +914,8 @@ " content_type=\"application/json\",\n", " )\n", ")\n", - "step_register = RegisterModel(\n", - " name=\"AbaloneRegisterModel\",\n", - " estimator=xgb_train,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + "\n", + "register_args = model.register(\n", " content_types=[\"text/csv\"],\n", " response_types=[\"text/csv\"],\n", " inference_instances=[\"ml.t2.medium\", \"ml.m5.xlarge\"],\n", @@ -845,7 +923,8 @@ " model_package_group_name=model_package_group_name,\n", " approval_status=model_approval_status,\n", " model_metrics=model_metrics,\n", - ")" + ")\n", + "step_register = ModelStep(name=\"AbaloneRegisterModel\", step_args=register_args)" ] }, { @@ -871,9 +950,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" } @@ -915,7 +991,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", @@ -967,7 +1047,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.pipeline import Pipeline\n", @@ -1007,7 +1091,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import json\n", @@ -1029,7 +1117,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.upsert(role_arn=role)" @@ -1045,7 +1137,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution = pipeline.start()" @@ -1063,7 +1159,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.describe()" @@ -1079,7 +1179,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.wait()" @@ -1095,7 +1199,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.list_steps()" @@ -1113,7 +1221,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from pprint import pprint\n", @@ -1139,7 +1251,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import time\n", @@ -1167,7 +1283,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution = pipeline.start(\n", @@ -1180,7 +1300,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.wait()" @@ -1189,7 +1313,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.list_steps()" @@ -1206,9 +1334,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" } @@ -1229,9 +1354,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" } @@ -1248,9 +1370,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "outputs_hidden": false - }, "pycharm": { "name": "#%%\n" } @@ -1283,4 +1402,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb deleted file mode 100644 index bda9ea6fc2..0000000000 --- a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb +++ /dev/null @@ -1,4503 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "3a9f8b86", - "metadata": { - "papermill": { - "duration": 0.031042, - "end_time": "2022-04-18T00:26:38.898215", - "exception": false, - "start_time": "2022-04-18T00:26:38.867173", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Orchestrate Jobs to Train and Evaluate Models with Amazon SageMaker Pipelines\n", - "\n", - "Amazon SageMaker Pipelines offers machine learning (ML) application developers and operations engineers the ability to orchestrate SageMaker jobs and author reproducible ML pipelines. It also enables them to deploy custom-built models for inference in real-time with low latency, run offline inferences with Batch Transform, and track lineage of artifacts. They can institute sound operational practices in deploying and monitoring production workflows, deploying model artifacts, and tracking artifact lineage through a simple interface, adhering to safety and best practice paradigms for ML application development.\n", - "\n", - "The SageMaker Pipelines service supports a SageMaker Pipeline domain specific language (DSL), which is a declarative JSON specification. This DSL defines a directed acyclic graph (DAG) of pipeline parameters and SageMaker job steps. The SageMaker Python Software Developer Kit (SDK) streamlines the generation of the pipeline DSL using constructs that engineers and scientists are already familiar with.\n", - "\n", - "## Runtime\n", - "\n", - "This notebook takes approximately an hour to run.\n", - "\n", - "## Contents\n", - "\n", - "1. [SageMaker Pipelines](#SageMaker-Pipelines)\n", - "1. [Notebook Overview](#Notebook-Overview)\n", - "1. [A SageMaker Pipeline](#A-SageMaker-Pipeline)\n", - "1. [Dataset](#Dataset)\n", - "1. [Define Parameters to Parametrize Pipeline Execution](#Define-Parameters-to-Parametrize-Pipeline-Execution)\n", - "1. [Define a Processing Step for Feature Engineering](#Define-a-Processing-Step-for-Feature-Engineering)\n", - "1. [Define a Training Step to Train a Model](#Define-a-Training-Step-to-Train-a-Model)\n", - "1. [Define a Model Evaluation Step to Evaluate the Trained Model](#Define-a-Model-Evaluation-Step-to-Evaluate-the-Trained-Model)\n", - "1. [Define a Create Model Step to Create a Model](#Define-a-Create-Model-Step-to-Create-a-Model)\n", - "1. [Define a Transform Step to Perform Batch Transformation](#Define-a-Transform-Step-to-Perform-Batch-Transformation)\n", - "1. [Define a Register Model Step to Create a Model Package](#Define-a-Register-Model-Step-to-Create-a-Model-Package)\n", - "1. [Define a Fail Step to Terminate the Pipeline Execution and Mark it as Failed](#Define-a-Fail-Step-to-Terminate-the-Pipeline-Execution-and-Mark-it-as-Failed)\n", - "1. [Define a Condition Step to Check Accuracy and Conditionally Create a Model and Run a Batch Transformation and Register a Model in the Model Registry, Or Terminate the Execution in Failed State](#Define-a-Condition-Step-to-Check-Accuracy-and-Conditionally-Create-a-Model-and-Run-a-Batch-Transformation-and-Register-a-Model-in-the-Model-Registry,-Or-Terminate-the-Execution-in-Failed-State)\n", - "1. [Define a Pipeline of Parameters, Steps, and Conditions](#Define-a-Pipeline-of-Parameters,-Steps,-and-Conditions)\n", - "1. [Submit the pipeline to SageMaker and start execution](#Submit-the-pipeline-to-SageMaker-and-start-execution)\n", - "1. [Pipeline Operations: Examining and Waiting for Pipeline Execution](#Pipeline-Operations:-Examining-and-Waiting-for-Pipeline-Execution)\n", - " 1. [Examining the Evaluation](#Examining-the-Evaluation)\n", - " 1. [Lineage](#Lineage)\n", - " 1. [Parametrized Executions](#Parametrized-Executions)" - ] - }, - { - "cell_type": "markdown", - "id": "c052c2e3", - "metadata": { - "papermill": { - "duration": 0.031989, - "end_time": "2022-04-18T00:26:38.961142", - "exception": false, - "start_time": "2022-04-18T00:26:38.929153", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## SageMaker Pipelines\n", - "\n", - "SageMaker Pipelines supports the following activities, which are demonstrated in this notebook:\n", - "\n", - "* Pipelines - A DAG of steps and conditions to orchestrate SageMaker jobs and resource creation.\n", - "* Processing job steps - A simplified, managed experience on SageMaker to run data processing workloads, such as feature engineering, data validation, model evaluation, and model interpretation.\n", - "* Training job steps - An iterative process that teaches a model to make predictions by presenting examples from a training dataset.\n", - "* Conditional execution steps - A step that provides conditional execution of branches in a pipeline.\n", - "* Register model steps - A step that creates a model package resource in the Model Registry that can be used to create deployable models in Amazon SageMaker.\n", - "* Create model steps - A step that creates a model for use in transform steps or later publication as an endpoint.\n", - "* Transform job steps - A batch transform to preprocess datasets to remove noise or bias that interferes with training or inference from a dataset, get inferences from large datasets, and run inference when a persistent endpoint is not needed.\n", - "* Fail steps - A step that stops a pipeline execution and marks the pipeline execution as failed.\n", - "* Parametrized Pipeline executions - Enables variation in pipeline executions according to specified parameters." - ] - }, - { - "cell_type": "markdown", - "id": "37efa33f", - "metadata": { - "papermill": { - "duration": 0.034621, - "end_time": "2022-04-18T00:26:39.027923", - "exception": false, - "start_time": "2022-04-18T00:26:38.993302", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Notebook Overview\n", - "\n", - "This notebook shows how to:\n", - "\n", - "* Define a set of Pipeline parameters that can be used to parametrize a SageMaker Pipeline.\n", - "* Define a Processing step that performs cleaning, feature engineering, and splitting the input data into train and test data sets.\n", - "* Define a Training step that trains a model on the preprocessed train data set.\n", - "* Define a Processing step that evaluates the trained model's performance on the test dataset.\n", - "* Define a Create Model step that creates a model from the model artifacts used in training.\n", - "* Define a Transform step that performs batch transformation based on the model that was created.\n", - "* Define a Register Model step that creates a model package from the estimator and model artifacts used to train the model.\n", - "* Define a Conditional step that measures a condition based on output from prior steps and conditionally executes other steps.\n", - "* Define a Fail step with a customized error message indicating the cause of the execution failure.\n", - "* Define and create a Pipeline definition in a DAG, with the defined parameters and steps.\n", - "* Start a Pipeline execution and wait for execution to complete.\n", - "* Download the model evaluation report from the S3 bucket for examination.\n", - "* Start a second Pipeline execution." - ] - }, - { - "cell_type": "markdown", - "id": "b6ae16a6", - "metadata": { - "papermill": { - "duration": 0.031533, - "end_time": "2022-04-18T00:26:39.090351", - "exception": false, - "start_time": "2022-04-18T00:26:39.058818", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## A SageMaker Pipeline\n", - "\n", - "The pipeline that you create follows a typical machine learning (ML) application pattern of preprocessing, training, evaluation, model creation, batch transformation, and model registration:\n", - "\n", - "![A typical ML Application pipeline](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-full.png)" - ] - }, - { - "cell_type": "markdown", - "id": "171887b6", - "metadata": { - "papermill": { - "duration": 0.03037, - "end_time": "2022-04-18T00:26:39.151023", - "exception": false, - "start_time": "2022-04-18T00:26:39.120653", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Dataset\n", - "\n", - "The dataset you use is the [UCI Machine Learning Abalone Dataset](https://archive.ics.uci.edu/ml/datasets/abalone) [1]. The aim for this task is to determine the age of an abalone snail from its physical measurements. At the core, this is a regression problem.\n", - "\n", - "The dataset contains several features: length (the longest shell measurement), diameter (the diameter perpendicular to length), height (the height with meat in the shell), whole_weight (the weight of whole abalone), shucked_weight (the weight of meat), viscera_weight (the gut weight after bleeding), shell_weight (the weight after being dried), sex ('M', 'F', 'I' where 'I' is Infant), and rings (integer).\n", - "\n", - "The number of rings turns out to be a good approximation for age (age is rings + 1.5). However, to obtain this number requires cutting the shell through the cone, staining the section, and counting the number of rings through a microscope, which is a time-consuming task. However, the other physical measurements are easier to determine. You use the dataset to build a predictive model of the variable rings through these other physical measurements.\n", - "\n", - "Before you upload the data to an S3 bucket, upgrade the [Amazon SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/) to the latest version and gather some constants you can use later in this notebook.\n", - "\n", - "> [1] Dua, D. and Graff, C. (2019). [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml). Irvine, CA: University of California, School of Information and Computer Science." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "017a6796", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:26:39.217502Z", - "iopub.status.busy": "2022-04-18T00:26:39.216724Z", - "iopub.status.idle": "2022-04-18T00:26:58.137345Z", - "shell.execute_reply": "2022-04-18T00:26:58.136909Z" - }, - "papermill": { - "duration": 18.956378, - "end_time": "2022-04-18T00:26:58.137468", - "exception": false, - "start_time": "2022-04-18T00:26:39.181090", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", - " from cryptography.utils import int_from_bytes\r\n", - "/opt/conda/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", - " from cryptography.utils import int_from_bytes\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: sagemaker in /opt/conda/lib/python3.7/site-packages (2.69.1.dev0)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting sagemaker\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Downloading sagemaker-2.86.2.tar.gz (521 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |▋ | 10 kB 29.8 MB/s eta 0:00:01\r", - "\u001b[K |█▎ | 20 kB 22.8 MB/s eta 0:00:01\r", - "\u001b[K |█▉ | 30 kB 16.7 MB/s eta 0:00:01\r", - "\u001b[K |██▌ | 40 kB 7.1 MB/s eta 0:00:01\r", - "\u001b[K |███▏ | 51 kB 6.2 MB/s eta 0:00:01\r", - "\u001b[K |███▊ | 61 kB 7.3 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |████▍ | 71 kB 7.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 81 kB 8.3 MB/s eta 0:00:01\r", - "\u001b[K |█████▋ | 92 kB 9.2 MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 102 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 112 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████▌ | 122 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████▏ | 133 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████▉ | 143 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████▍ | 153 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 163 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████▊ | 174 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████▎ | 184 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 194 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 204 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▏ | 215 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▉ | 225 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▌ | 235 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 245 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▊ | 256 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▎ | 266 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 276 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▋ | 286 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▏ | 296 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▉ | 307 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▌ | 317 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 327 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▊ | 337 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▍ | 348 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 358 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▋ | 368 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▎ | 378 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▉ | 389 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▌ | 399 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 409 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 419 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 430 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 440 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 450 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 460 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 471 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 481 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 491 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 501 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 512 kB 8.6 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 521 kB 8.6 MB/s \r\n", - "\u001b[?25h" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting attrs==20.3.0\r\n", - " Downloading attrs-20.3.0-py2.py3-none-any.whl (49 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |██████▋ | 10 kB 32.7 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▎ | 20 kB 38.9 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 30 kB 47.2 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 40 kB 51.7 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |████████████████████████████████| 49 kB 8.4 MB/s \r\n", - "\u001b[?25h" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting boto3>=1.20.21\r\n", - " Downloading boto3-1.21.42-py3-none-any.whl (132 kB)\r\n", - "\u001b[?25l\r", - "\u001b[K |██▌ | 10 kB 33.3 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 20 kB 41.8 MB/s eta 0:00:01\r", - "\u001b[K |███████▍ | 30 kB 51.8 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 40 kB 56.5 MB/s eta 0:00:01\r", - "\u001b[K |████████████▍ | 51 kB 59.5 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▉ | 61 kB 62.7 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 71 kB 64.2 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▉ | 81 kB 66.9 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▎ | 92 kB 69.0 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 102 kB 71.1 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 112 kB 71.1 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 122 kB 71.1 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 132 kB 71.1 MB/s \r\n", - "\u001b[?25hRequirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.2.0)\r\n", - "Requirement already satisfied: numpy>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.21.1)\r\n", - "Requirement already satisfied: protobuf>=3.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.17.3)\r\n", - "Requirement already satisfied: protobuf3-to-dict>=0.1.5 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.1.5)\r\n", - "Requirement already satisfied: smdebug_rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.0.1)\r\n", - "Requirement already satisfied: importlib-metadata>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.5.0)\r\n", - "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (20.1)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.0.1)\r\n", - "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.2.8)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3>=1.20.21->sagemaker) (0.10.0)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting botocore<1.25.0,>=1.24.42\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Downloading botocore-1.24.42-py3-none-any.whl (8.7 MB)\r\n", - "\u001b[?25l\r", - "\u001b[K | | 10 kB 26.4 MB/s eta 0:00:01\r", - "\u001b[K | | 20 kB 33.7 MB/s eta 0:00:01\r", - "\u001b[K |▏ | 30 kB 38.0 MB/s eta 0:00:01\r", - "\u001b[K |▏ | 40 kB 43.4 MB/s eta 0:00:01\r", - "\u001b[K |▏ | 51 kB 41.9 MB/s eta 0:00:01\r", - "\u001b[K |▎ | 61 kB 46.4 MB/s eta 0:00:01\r", - "\u001b[K |▎ | 71 kB 46.4 MB/s eta 0:00:01\r", - "\u001b[K |▎ | 81 kB 48.5 MB/s eta 0:00:01\r", - "\u001b[K |▍ | 92 kB 49.5 MB/s eta 0:00:01\r", - "\u001b[K |▍ | 102 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▍ | 112 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▌ | 122 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▌ | 133 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▌ | 143 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▋ | 153 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▋ | 163 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▋ | 174 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▊ | 184 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▊ | 194 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▊ | 204 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▉ | 215 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▉ | 225 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |▉ | 235 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 245 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 256 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 266 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 276 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 286 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█ | 296 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▏ | 307 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▏ | 317 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▏ | 327 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▎ | 337 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▎ | 348 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▎ | 358 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▍ | 368 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▍ | 378 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▍ | 389 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▌ | 399 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▌ | 409 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▌ | 419 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▋ | 430 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▋ | 440 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▋ | 450 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▊ | 460 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▊ | 471 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▊ | 481 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▉ | 491 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█▉ | 501 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 512 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 522 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 532 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 542 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 552 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██ | 563 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▏ | 573 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▏ | 583 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▏ | 593 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▎ | 604 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▎ | 614 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▎ | 624 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▍ | 634 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▍ | 645 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▍ | 655 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▌ | 665 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▌ | 675 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▌ | 686 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▋ | 696 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▋ | 706 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▋ | 716 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▊ | 727 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▊ | 737 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▊ | 747 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▉ | 757 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▉ | 768 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██▉ | 778 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 788 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 798 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 808 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 819 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 829 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███ | 839 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▏ | 849 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▏ | 860 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▏ | 870 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▎ | 880 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▎ | 890 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▎ | 901 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▍ | 911 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▍ | 921 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▍ | 931 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▌ | 942 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▌ | 952 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▌ | 962 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▋ | 972 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▋ | 983 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▊ | 993 kB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▊ | 1.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▊ | 1.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▉ | 1.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▉ | 1.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███▉ | 1.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▏ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▏ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▏ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▎ | 1.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▎ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▎ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▍ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▍ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▍ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▌ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▌ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▌ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▋ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▋ | 1.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▋ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▊ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▊ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▊ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▉ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▉ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████▉ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 1.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 1.3 MB 51.4 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |█████ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▏ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▏ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▏ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▎ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▎ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▎ | 1.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▍ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▍ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▌ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▌ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▌ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▋ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▋ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▋ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▊ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▊ | 1.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▊ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▉ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▉ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████▉ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████ | 1.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▏ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▏ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▏ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▎ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▍ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▍ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▍ | 1.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▌ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▌ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▌ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▋ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▋ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▋ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▊ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▊ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▊ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▉ | 1.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▉ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████▉ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▏ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▏ | 1.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▏ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▎ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▎ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▍ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▍ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▍ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▌ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▌ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▌ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▋ | 2.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▋ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▋ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▊ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▊ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▊ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▉ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▉ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████▉ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▏ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▏ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▏ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▎ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▎ | 2.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▎ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▍ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▍ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▍ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▌ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▌ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▌ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▋ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▋ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▋ | 2.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▊ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▊ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▊ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▉ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▉ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████▉ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████ | 2.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▏ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▏ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▏ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▎ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▎ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▎ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▍ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▍ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▍ | 2.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▌ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▌ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▌ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▋ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▋ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▋ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▊ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▊ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▊ | 2.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▉ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▉ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████▉ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▏ | 2.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▏ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▏ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▎ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▎ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▎ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▍ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▍ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▍ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▌ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▌ | 2.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▌ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▋ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▋ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▋ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▊ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▊ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▊ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▉ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████▉ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 2.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▏ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▏ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▏ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▎ | 3.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▎ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▎ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▍ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▍ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▍ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▌ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▌ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▌ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▋ | 3.1 MB 51.4 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |███████████▋ | 3.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▋ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▊ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▊ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▊ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▉ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▉ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████▉ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▏ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▏ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▏ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▎ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▎ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▎ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▍ | 3.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▍ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▍ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▌ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▌ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▌ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▋ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▊ | 3.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▊ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▉ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▉ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████▉ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████ | 3.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▏ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▏ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▏ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▎ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▎ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▎ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▍ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▍ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▍ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▌ | 3.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▌ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▌ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▋ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▋ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▋ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▊ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▊ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▊ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▉ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▉ | 3.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████▉ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▏ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▏ | 3.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▏ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▎ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▎ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▎ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▍ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▍ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▍ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▌ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▌ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▋ | 3.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▋ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▋ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▊ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▊ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▊ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▉ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▉ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████▉ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▏ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▏ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▏ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▎ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▎ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▎ | 4.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▍ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▍ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▍ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▌ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▌ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▌ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▋ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▋ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▋ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▊ | 4.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▊ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▊ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▉ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▉ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████▉ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▏ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▏ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▏ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▎ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▎ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▍ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▍ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▍ | 4.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▌ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▌ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▌ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▋ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▋ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▋ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▊ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▊ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▊ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▉ | 4.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▉ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████▉ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▏ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▏ | 4.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▏ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▎ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▍ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▍ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▍ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▌ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▌ | 4.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▌ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▋ | 4.8 MB 51.4 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |█████████████████▋ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▋ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▊ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▊ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▊ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▉ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▉ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████▉ | 4.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▏ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▏ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▎ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▎ | 4.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▎ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▍ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▍ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▍ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▌ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▌ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▌ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▋ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▋ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▋ | 5.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▊ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▊ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▊ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▉ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▉ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████▉ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▏ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▏ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▏ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▎ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▎ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▎ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▍ | 5.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▍ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▍ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▌ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▌ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▌ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▋ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▋ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▋ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▊ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▊ | 5.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▊ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▉ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▉ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████▉ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▏ | 5.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▏ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▏ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▎ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▎ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▎ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▍ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▍ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▍ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▌ | 5.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▌ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▌ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▋ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▋ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▋ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▊ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▊ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▊ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▉ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▉ | 5.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████▉ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▏ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▏ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▏ | 5.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▎ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▎ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▎ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▍ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▍ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▍ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▌ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▌ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▌ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▋ | 5.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▋ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▋ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▊ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▊ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▉ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▉ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████▉ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 5.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▏ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▏ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▏ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▎ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▎ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▎ | 6.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▍ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▍ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▍ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▌ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▌ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▌ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▋ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▋ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▋ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▊ | 6.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▊ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▊ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▉ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▉ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████▉ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▏ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▏ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▏ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▎ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▎ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▎ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▍ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▍ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▍ | 6.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▌ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▌ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▌ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▋ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▋ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▊ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▊ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▊ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▉ | 6.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▉ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████▉ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▏ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▏ | 6.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▏ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▎ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▎ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▎ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▍ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▍ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▍ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▌ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▌ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▌ | 6.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▋ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▊ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████▉ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▏ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 6.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▎ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▍ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▌ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▋ | 6.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▊ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████▉ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▏ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▎ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 7.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▍ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▌ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 7.2 MB 51.4 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |██████████████████████████▋ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▋ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▊ | 7.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████▉ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████ | 7.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▏ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▎ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▍ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 7.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▌ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▋ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▊ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████▉ | 7.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▏ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 7.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▎ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▍ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▌ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 7.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▋ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▊ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████▉ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 7.8 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▏ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▎ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 7.9 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▍ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▌ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▋ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 8.0 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▊ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |█████████████████████████████▉ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.1 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▏ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▎ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▍ | 8.2 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▌ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▋ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▊ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 8.3 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |██████████████████████████████▉ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████ | 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▏| 8.4 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▎| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▍| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▌| 8.5 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▋| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▊| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |███████████████████████████████▉| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 8.6 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 8.7 MB 51.4 MB/s eta 0:00:01\r", - "\u001b[K |████████████████████████████████| 8.7 MB 51.4 MB/s eta 0:00:01" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "\u001b[K |████████████████████████████████| 8.7 MB 51.4 MB/s \r\n", - "\u001b[?25h" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: s3transfer<0.6.0,>=0.5.0 in /opt/conda/lib/python3.7/site-packages (from boto3>=1.20.21->sagemaker) (0.5.0)\r\n", - "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.25.0,>=1.24.42->boto3>=1.20.21->sagemaker) (1.26.6)\r\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.7/site-packages (from botocore<1.25.0,>=1.24.42->boto3>=1.20.21->sagemaker) (2.8.1)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=1.4.0->sagemaker) (2.2.0)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker) (1.14.0)\r\n", - "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker) (2.4.6)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2019.3)\r\n", - "Requirement already satisfied: ppft>=1.6.6.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (1.6.6.4)\r\n", - "Requirement already satisfied: pox>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.0)\r\n", - "Requirement already satisfied: dill>=0.3.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.4)\r\n", - "Requirement already satisfied: multiprocess>=0.70.12 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.70.12.2)\r\n", - "Building wheels for collected packages: sagemaker\r\n", - " Building wheel for sagemaker (setup.py) ... \u001b[?25l" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b\\" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b|" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b/" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b-" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b\\" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b|" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \b/" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\b \bdone\r\n", - "\u001b[?25h" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Created wheel for sagemaker: filename=sagemaker-2.86.2-py2.py3-none-any.whl size=720870 sha256=30eb3c1cca0f6a5480b9654912669d9e879aee733c91bd63d8a9addf75998a4d\r\n", - " Stored in directory: /root/.cache/pip/wheels/ea/94/35/1b815c5bdf63f8947160a8e6a4eb12f4bb05bd6f9cc773176a\r\n", - "Successfully built sagemaker\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing collected packages: botocore, boto3, attrs, sagemaker\r\n", - " Attempting uninstall: botocore\r\n", - " Found existing installation: botocore 1.23.7\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Uninstalling botocore-1.23.7:\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Successfully uninstalled botocore-1.23.7\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Attempting uninstall: boto3\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Found existing installation: boto3 1.20.7\r\n", - " Uninstalling boto3-1.20.7:\r\n", - " Successfully uninstalled boto3-1.20.7\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Attempting uninstall: attrs\r\n", - " Found existing installation: attrs 19.3.0\r\n", - " Uninstalling attrs-19.3.0:\r\n", - " Successfully uninstalled attrs-19.3.0\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Attempting uninstall: sagemaker\r\n", - " Found existing installation: sagemaker 2.69.1.dev0\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Uninstalling sagemaker-2.69.1.dev0:\r\n", - " Successfully uninstalled sagemaker-2.69.1.dev0\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\r\n", - "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\r\n", - "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\r\n", - "awscli 1.22.7 requires botocore==1.23.7, but you have botocore 1.24.42 which is incompatible.\r\n", - "aiobotocore 1.3.3 requires botocore<1.20.107,>=1.20.106, but you have botocore 1.24.42 which is incompatible.\u001b[0m\r\n", - "Successfully installed attrs-20.3.0 boto3-1.21.42 botocore-1.24.42 sagemaker-2.86.2\r\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n", - "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.0.4 is available.\r\n", - "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n" - ] - } - ], - "source": [ - "!pip install --upgrade sagemaker\n", - "\n", - "import boto3\n", - "import sagemaker\n", - "\n", - "\n", - "sagemaker_session = sagemaker.session.Session()\n", - "region = sagemaker_session.boto_region_name\n", - "role = sagemaker.get_execution_role()\n", - "default_bucket = sagemaker_session.default_bucket()\n", - "model_package_group_name = f\"AbaloneModelPackageGroupName\"" - ] - }, - { - "cell_type": "markdown", - "id": "0e125406", - "metadata": { - "papermill": { - "duration": 0.04072, - "end_time": "2022-04-18T00:26:58.219587", - "exception": false, - "start_time": "2022-04-18T00:26:58.178867", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Now, upload the data into the default bucket. You can select our own data set for the `input_data_uri` as is appropriate." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9ba7d402", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:26:58.444676Z", - "iopub.status.busy": "2022-04-18T00:26:58.443960Z", - "iopub.status.idle": "2022-04-18T00:26:58.740397Z", - "shell.execute_reply": "2022-04-18T00:26:58.740762Z" - }, - "papermill": { - "duration": 0.478884, - "end_time": "2022-04-18T00:26:58.740911", - "exception": false, - "start_time": "2022-04-18T00:26:58.262027", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "!mkdir -p data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "deb34a1d", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:26:58.932598Z", - "iopub.status.busy": "2022-04-18T00:26:58.931487Z", - "iopub.status.idle": "2022-04-18T00:26:59.983116Z", - "shell.execute_reply": "2022-04-18T00:26:59.983495Z" - }, - "papermill": { - "duration": 1.20106, - "end_time": "2022-04-18T00:26:59.983639", - "exception": false, - "start_time": "2022-04-18T00:26:58.782579", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "s3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset.csv\n" - ] - } - ], - "source": [ - "local_path = \"data/abalone-dataset.csv\"\n", - "\n", - "s3 = boto3.resource(\"s3\")\n", - "s3.Bucket(f\"sagemaker-sample-files\").download_file(\n", - " \"datasets/tabular/uci_abalone/abalone.csv\", local_path\n", - ")\n", - "\n", - "base_uri = f\"s3://{default_bucket}/abalone\"\n", - "input_data_uri = sagemaker.s3.S3Uploader.upload(\n", - " local_path=local_path,\n", - " desired_s3_uri=base_uri,\n", - ")\n", - "print(input_data_uri)" - ] - }, - { - "cell_type": "markdown", - "id": "f19a382b", - "metadata": { - "papermill": { - "duration": 0.042061, - "end_time": "2022-04-18T00:27:00.067912", - "exception": false, - "start_time": "2022-04-18T00:27:00.025851", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Download a second dataset for batch transformation after model creation. You can select our own dataset for the `batch_data_uri` as is appropriate." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "3ac709e8", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:00.165097Z", - "iopub.status.busy": "2022-04-18T00:27:00.164270Z", - "iopub.status.idle": "2022-04-18T00:27:00.440584Z", - "shell.execute_reply": "2022-04-18T00:27:00.444404Z" - }, - "papermill": { - "duration": 0.332017, - "end_time": "2022-04-18T00:27:00.444577", - "exception": false, - "start_time": "2022-04-18T00:27:00.112560", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "s3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset-batch\n" - ] - } - ], - "source": [ - "local_path = \"data/abalone-dataset-batch\"\n", - "\n", - "s3 = boto3.resource(\"s3\")\n", - "s3.Bucket(f\"sagemaker-servicecatalog-seedcode-{region}\").download_file(\n", - " \"dataset/abalone-dataset-batch\", local_path\n", - ")\n", - "\n", - "base_uri = f\"s3://{default_bucket}/abalone\"\n", - "batch_data_uri = sagemaker.s3.S3Uploader.upload(\n", - " local_path=local_path,\n", - " desired_s3_uri=base_uri,\n", - ")\n", - "print(batch_data_uri)" - ] - }, - { - "cell_type": "markdown", - "id": "cdb20305", - "metadata": { - "papermill": { - "duration": 0.05191, - "end_time": "2022-04-18T00:27:00.557073", - "exception": false, - "start_time": "2022-04-18T00:27:00.505163", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define Parameters to Parametrize Pipeline Execution\n", - "\n", - "Define Pipeline parameters that you can use to parametrize the pipeline. Parameters enable custom pipeline executions and schedules without having to modify the Pipeline definition.\n", - "\n", - "The supported parameter types include:\n", - "\n", - "* `ParameterString` - represents a `str` Python type\n", - "* `ParameterInteger` - represents an `int` Python type\n", - "* `ParameterFloat` - represents a `float` Python type\n", - "\n", - "These parameters support providing a default value, which can be overridden on pipeline execution. The default value specified should be an instance of the type of the parameter.\n", - "\n", - "The parameters defined in this workflow include:\n", - "\n", - "* `processing_instance_type` - The `ml.*` instance type of the processing job.\n", - "* `processing_instance_count` - The instance count of the processing job.\n", - "* `instance_type` - The `ml.*` instance type of the training job.\n", - "* `model_approval_status` - The approval status to register with the trained model for CI/CD purposes (\"PendingManualApproval\" is the default).\n", - "* `input_data` - The S3 bucket URI location of the input data.\n", - "* `batch_data` - The S3 bucket URI location of the batch data.\n", - "* `mse_threshold` - The Mean Squared Error (MSE) threshold used to verify the accuracy of a model." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "177ff1bd", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:00.760136Z", - "iopub.status.busy": "2022-04-18T00:27:00.759034Z", - "iopub.status.idle": "2022-04-18T00:27:00.761022Z", - "shell.execute_reply": "2022-04-18T00:27:00.761444Z" - }, - "papermill": { - "duration": 0.118215, - "end_time": "2022-04-18T00:27:00.761626", - "exception": false, - "start_time": "2022-04-18T00:27:00.643411", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.workflow.parameters import (\n", - " ParameterInteger,\n", - " ParameterString,\n", - " ParameterFloat,\n", - ")\n", - "\n", - "\n", - "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", - "processing_instance_type = ParameterString(\n", - " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", - ")\n", - "instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", - "model_approval_status = ParameterString(\n", - " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", - ")\n", - "input_data = ParameterString(\n", - " name=\"InputData\",\n", - " default_value=input_data_uri,\n", - ")\n", - "batch_data = ParameterString(\n", - " name=\"BatchData\",\n", - " default_value=batch_data_uri,\n", - ")\n", - "mse_threshold = ParameterFloat(name=\"MseThreshold\", default_value=6.0)" - ] - }, - { - "cell_type": "markdown", - "id": "7db46ee8", - "metadata": { - "papermill": { - "duration": 0.104152, - "end_time": "2022-04-18T00:27:01.035968", - "exception": false, - "start_time": "2022-04-18T00:27:00.931816", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define Parameters](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-1.png)" - ] - }, - { - "cell_type": "markdown", - "id": "cd4a939f", - "metadata": { - "papermill": { - "duration": 0.178766, - "end_time": "2022-04-18T00:27:01.330422", - "exception": false, - "start_time": "2022-04-18T00:27:01.151656", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Processing Step for Feature Engineering\n", - "\n", - "First, develop a preprocessing script that is specified in the Processing step.\n", - "\n", - "This notebook cell writes a file `preprocessing_abalone.py`, which contains the preprocessing script. You can update the script, and rerun this cell to overwrite. The preprocessing script uses `scikit-learn` to do the following:\n", - "\n", - "* Fill in missing sex category data and encode it so that it is suitable for training.\n", - "* Scale and normalize all numerical fields, aside from sex and rings numerical data.\n", - "* Split the data into training, validation, and test datasets.\n", - "\n", - "The Processing step executes the script on the input data. The Training step uses the preprocessed training features and labels to train a model. The Evaluation step uses the trained model and preprocessed test features and labels to evaluate the model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e84a2b1c", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:01.631001Z", - "iopub.status.busy": "2022-04-18T00:27:01.630119Z", - "iopub.status.idle": "2022-04-18T00:27:01.854003Z", - "shell.execute_reply": "2022-04-18T00:27:01.850909Z" - }, - "papermill": { - "duration": 0.399913, - "end_time": "2022-04-18T00:27:01.854610", - "exception": false, - "start_time": "2022-04-18T00:27:01.454697", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "!mkdir -p abalone" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6e1a780d", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:02.136534Z", - "iopub.status.busy": "2022-04-18T00:27:02.135485Z", - "iopub.status.idle": "2022-04-18T00:27:02.139256Z", - "shell.execute_reply": "2022-04-18T00:27:02.139664Z" - }, - "papermill": { - "duration": 0.107493, - "end_time": "2022-04-18T00:27:02.139838", - "exception": false, - "start_time": "2022-04-18T00:27:02.032345", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Writing abalone/preprocessing.py\n" - ] - } - ], - "source": [ - "%%writefile abalone/preprocessing.py\n", - "import argparse\n", - "import os\n", - "import requests\n", - "import tempfile\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", - "\n", - "\n", - "# Since we get a headerless CSV file, we specify the column names here.\n", - "feature_columns_names = [\n", - " \"sex\",\n", - " \"length\",\n", - " \"diameter\",\n", - " \"height\",\n", - " \"whole_weight\",\n", - " \"shucked_weight\",\n", - " \"viscera_weight\",\n", - " \"shell_weight\",\n", - "]\n", - "label_column = \"rings\"\n", - "\n", - "feature_columns_dtype = {\n", - " \"sex\": str,\n", - " \"length\": np.float64,\n", - " \"diameter\": np.float64,\n", - " \"height\": np.float64,\n", - " \"whole_weight\": np.float64,\n", - " \"shucked_weight\": np.float64,\n", - " \"viscera_weight\": np.float64,\n", - " \"shell_weight\": np.float64,\n", - "}\n", - "label_column_dtype = {\"rings\": np.float64}\n", - "\n", - "\n", - "def merge_two_dicts(x, y):\n", - " z = x.copy()\n", - " z.update(y)\n", - " return z\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " base_dir = \"/opt/ml/processing\"\n", - "\n", - " df = pd.read_csv(\n", - " f\"{base_dir}/input/abalone-dataset.csv\",\n", - " header=None,\n", - " names=feature_columns_names + [label_column],\n", - " dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n", - " )\n", - " numeric_features = list(feature_columns_names)\n", - " numeric_features.remove(\"sex\")\n", - " numeric_transformer = Pipeline(\n", - " steps=[(\"imputer\", SimpleImputer(strategy=\"median\")), (\"scaler\", StandardScaler())]\n", - " )\n", - "\n", - " categorical_features = [\"sex\"]\n", - " categorical_transformer = Pipeline(\n", - " steps=[\n", - " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", - " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n", - " ]\n", - " )\n", - "\n", - " preprocess = ColumnTransformer(\n", - " transformers=[\n", - " (\"num\", numeric_transformer, numeric_features),\n", - " (\"cat\", categorical_transformer, categorical_features),\n", - " ]\n", - " )\n", - "\n", - " y = df.pop(\"rings\")\n", - " X_pre = preprocess.fit_transform(df)\n", - " y_pre = y.to_numpy().reshape(len(y), 1)\n", - "\n", - " X = np.concatenate((y_pre, X_pre), axis=1)\n", - "\n", - " np.random.shuffle(X)\n", - " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", - "\n", - " pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n", - " pd.DataFrame(validation).to_csv(\n", - " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", - " )\n", - " pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)" - ] - }, - { - "cell_type": "markdown", - "id": "e529d9c0", - "metadata": { - "papermill": { - "duration": 0.158348, - "end_time": "2022-04-18T00:27:02.341041", - "exception": false, - "start_time": "2022-04-18T00:27:02.182693", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Next, create an instance of a `SKLearnProcessor` processor and use that in our `ProcessingStep`.\n", - "\n", - "You also specify the `framework_version` to use throughout this notebook.\n", - "\n", - "Note the `processing_instance_type` and `processing_instance_count` parameters used by the processor instance." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "22bd2e33", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:02.542297Z", - "iopub.status.busy": "2022-04-18T00:27:02.541393Z", - "iopub.status.idle": "2022-04-18T00:27:02.646852Z", - "shell.execute_reply": "2022-04-18T00:27:02.646034Z" - }, - "papermill": { - "duration": 0.206931, - "end_time": "2022-04-18T00:27:02.646979", - "exception": false, - "start_time": "2022-04-18T00:27:02.440048", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.sklearn.processing import SKLearnProcessor\n", - "\n", - "\n", - "framework_version = \"1.0-1\"\n", - "\n", - "sklearn_processor = SKLearnProcessor(\n", - " framework_version=framework_version,\n", - " instance_type=processing_instance_type,\n", - " instance_count=processing_instance_count,\n", - " base_job_name=\"sklearn-abalone-process\",\n", - " role=role,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "1757d287", - "metadata": { - "papermill": { - "duration": 0.106575, - "end_time": "2022-04-18T00:27:02.859741", - "exception": false, - "start_time": "2022-04-18T00:27:02.753166", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Finally, use the processor instance to construct a `ProcessingStep`, along with the input and output channels, and the code that runs when the pipeline invokes pipeline execution. This is similar to a processor instance's `run()` method in the Python SDK.\n", - "\n", - "Note the `input_data` parameters passed into `ProcessingStep` is the input data used in the step. This input data is used by the processor instance when it is run.\n", - "\n", - "Also, note the `\"train_data\"` and `\"test_data\"` named channels specified in the output configuration for the processing job. Step `Properties` can be used in subsequent steps and resolve to their runtime values at execution. Specifically, this usage is called out when you define the training step." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7cad2493", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:03.136272Z", - "iopub.status.busy": "2022-04-18T00:27:03.135400Z", - "iopub.status.idle": "2022-04-18T00:27:03.146315Z", - "shell.execute_reply": "2022-04-18T00:27:03.145802Z" - }, - "papermill": { - "duration": 0.180929, - "end_time": "2022-04-18T00:27:03.146454", - "exception": false, - "start_time": "2022-04-18T00:27:02.965525", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", - "from sagemaker.workflow.steps import ProcessingStep\n", - "\n", - "\n", - "step_process = ProcessingStep(\n", - " name=\"AbaloneProcess\",\n", - " processor=sklearn_processor,\n", - " inputs=[\n", - " ProcessingInput(source=input_data, destination=\"/opt/ml/processing/input\"),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", - " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", - " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", - " ],\n", - " code=\"abalone/preprocessing.py\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "ae886e49", - "metadata": { - "papermill": { - "duration": 0.106882, - "end_time": "2022-04-18T00:27:03.362015", - "exception": false, - "start_time": "2022-04-18T00:27:03.255133", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Processing Step for Feature Engineering](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-2.png)" - ] - }, - { - "cell_type": "markdown", - "id": "99a2d101", - "metadata": { - "papermill": { - "duration": 0.105673, - "end_time": "2022-04-18T00:27:03.636255", - "exception": false, - "start_time": "2022-04-18T00:27:03.530582", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Training Step to Train a Model\n", - "\n", - "In this section, use Amazon SageMaker's [XGBoost Algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html) to train on this dataset. Configure an Estimator for the XGBoost algorithm and the input dataset. A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, and saves a model to `model_dir` so that it can be hosted later.\n", - "\n", - "The model path where the models from training are saved is also specified.\n", - "\n", - "Note the `instance_type` parameter may be used in multiple places in the pipeline. In this case, the `instance_type` is passed into the estimator." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "0da0afcf", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:03.930444Z", - "iopub.status.busy": "2022-04-18T00:27:03.856195Z", - "iopub.status.idle": "2022-04-18T00:27:03.943258Z", - "shell.execute_reply": "2022-04-18T00:27:03.942838Z" - }, - "papermill": { - "duration": 0.198963, - "end_time": "2022-04-18T00:27:03.943382", - "exception": false, - "start_time": "2022-04-18T00:27:03.744419", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.estimator import Estimator\n", - "\n", - "\n", - "model_path = f\"s3://{default_bucket}/AbaloneTrain\"\n", - "image_uri = sagemaker.image_uris.retrieve(\n", - " framework=\"xgboost\",\n", - " region=region,\n", - " version=\"1.0-1\",\n", - " py_version=\"py3\",\n", - " instance_type=instance_type,\n", - ")\n", - "xgb_train = Estimator(\n", - " image_uri=image_uri,\n", - " instance_type=instance_type,\n", - " instance_count=1,\n", - " output_path=model_path,\n", - " role=role,\n", - ")\n", - "xgb_train.set_hyperparameters(\n", - " objective=\"reg:linear\",\n", - " num_round=50,\n", - " max_depth=5,\n", - " eta=0.2,\n", - " gamma=4,\n", - " min_child_weight=6,\n", - " subsample=0.7,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "acb3d8d4", - "metadata": { - "papermill": { - "duration": 0.105706, - "end_time": "2022-04-18T00:27:04.154522", - "exception": false, - "start_time": "2022-04-18T00:27:04.048816", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Finally, use the estimator instance to construct a `TrainingStep` as well as the `properties` of the prior `ProcessingStep` used as input in the `TrainingStep` inputs and the code that's executed when the pipeline invokes the pipeline execution. This is similar to an estimator's `fit` method in the Python SDK.\n", - "\n", - "Pass in the `S3Uri` of the `\"train_data\"` output channel to the `TrainingStep`. Also, use the other `\"test_data\"` output channel for model evaluation in the pipeline. The `properties` attribute of a Pipeline step matches the object model of the corresponding response of a describe call. These properties can be referenced as placeholder values and are resolved at runtime. For example, the `ProcessingStep` `properties` attribute matches the object model of the [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response object." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "b58b44f8", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:04.438230Z", - "iopub.status.busy": "2022-04-18T00:27:04.437184Z", - "iopub.status.idle": "2022-04-18T00:27:04.438922Z", - "shell.execute_reply": "2022-04-18T00:27:04.439349Z" - }, - "papermill": { - "duration": 0.178944, - "end_time": "2022-04-18T00:27:04.439510", - "exception": false, - "start_time": "2022-04-18T00:27:04.260566", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.inputs import TrainingInput\n", - "from sagemaker.workflow.steps import TrainingStep\n", - "\n", - "\n", - "step_train = TrainingStep(\n", - " name=\"AbaloneTrain\",\n", - " estimator=xgb_train,\n", - " inputs={\n", - " \"train\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " \"validation\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", - " \"validation\"\n", - " ].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "32529fe6", - "metadata": { - "papermill": { - "duration": 0.10595, - "end_time": "2022-04-18T00:27:04.650608", - "exception": false, - "start_time": "2022-04-18T00:27:04.544658", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Training Step to Train a Model](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-3.png)" - ] - }, - { - "cell_type": "markdown", - "id": "dc41fa12", - "metadata": { - "papermill": { - "duration": 0.106253, - "end_time": "2022-04-18T00:27:04.863458", - "exception": false, - "start_time": "2022-04-18T00:27:04.757205", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Model Evaluation Step to Evaluate the Trained Model\n", - "\n", - "First, develop an evaluation script that is specified in a Processing step that performs the model evaluation.\n", - "\n", - "After pipeline execution, you can examine the resulting `evaluation.json` for analysis.\n", - "\n", - "The evaluation script uses `xgboost` to do the following:\n", - "\n", - "* Load the model.\n", - "* Read the test data.\n", - "* Issue predictions against the test data.\n", - "* Build a classification report, including accuracy and ROC curve.\n", - "* Save the evaluation report to the evaluation directory." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "445f1b8a", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:05.145905Z", - "iopub.status.busy": "2022-04-18T00:27:05.144954Z", - "iopub.status.idle": "2022-04-18T00:27:05.149035Z", - "shell.execute_reply": "2022-04-18T00:27:05.148480Z" - }, - "papermill": { - "duration": 0.114344, - "end_time": "2022-04-18T00:27:05.149184", - "exception": false, - "start_time": "2022-04-18T00:27:05.034840", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Writing abalone/evaluation.py\n" - ] - } - ], - "source": [ - "%%writefile abalone/evaluation.py\n", - "import json\n", - "import pathlib\n", - "import pickle\n", - "import tarfile\n", - "\n", - "import joblib\n", - "import numpy as np\n", - "import pandas as pd\n", - "import xgboost\n", - "\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " model_path = f\"/opt/ml/processing/model/model.tar.gz\"\n", - " with tarfile.open(model_path) as tar:\n", - " tar.extractall(path=\".\")\n", - "\n", - " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", - "\n", - " test_path = \"/opt/ml/processing/test/test.csv\"\n", - " df = pd.read_csv(test_path, header=None)\n", - "\n", - " y_test = df.iloc[:, 0].to_numpy()\n", - " df.drop(df.columns[0], axis=1, inplace=True)\n", - "\n", - " X_test = xgboost.DMatrix(df.values)\n", - "\n", - " predictions = model.predict(X_test)\n", - "\n", - " mse = mean_squared_error(y_test, predictions)\n", - " std = np.std(y_test - predictions)\n", - " report_dict = {\n", - " \"regression_metrics\": {\n", - " \"mse\": {\"value\": mse, \"standard_deviation\": std},\n", - " },\n", - " }\n", - "\n", - " output_dir = \"/opt/ml/processing/evaluation\"\n", - " pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n", - "\n", - " evaluation_path = f\"{output_dir}/evaluation.json\"\n", - " with open(evaluation_path, \"w\") as f:\n", - " f.write(json.dumps(report_dict))" - ] - }, - { - "cell_type": "markdown", - "id": "291a0f15", - "metadata": { - "papermill": { - "duration": 0.16557, - "end_time": "2022-04-18T00:27:05.430895", - "exception": false, - "start_time": "2022-04-18T00:27:05.265325", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Next, create an instance of a `ScriptProcessor` processor and use it in the `ProcessingStep`.\n", - "\n", - "Note the `processing_instance_type` parameter passed into the processor." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c6d4c85a", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:05.655883Z", - "iopub.status.busy": "2022-04-18T00:27:05.655326Z", - "iopub.status.idle": "2022-04-18T00:27:05.663003Z", - "shell.execute_reply": "2022-04-18T00:27:05.662503Z" - }, - "papermill": { - "duration": 0.125324, - "end_time": "2022-04-18T00:27:05.663126", - "exception": false, - "start_time": "2022-04-18T00:27:05.537802", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.processing import ScriptProcessor\n", - "\n", - "\n", - "script_eval = ScriptProcessor(\n", - " image_uri=image_uri,\n", - " command=[\"python3\"],\n", - " instance_type=processing_instance_type,\n", - " instance_count=1,\n", - " base_job_name=\"script-abalone-eval\",\n", - " role=role,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b316d741", - "metadata": { - "papermill": { - "duration": 0.106669, - "end_time": "2022-04-18T00:27:05.939502", - "exception": false, - "start_time": "2022-04-18T00:27:05.832833", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Use the processor instance to construct a `ProcessingStep`, along with the input and output channels and the code that runs when the pipeline invokes pipeline execution. This is similar to a processor instance's `run` method in the Python SDK.\n", - "\n", - "Specifically, the `S3ModelArtifacts` from the `step_train` `properties` and the `S3Uri` of the `\"test_data\"` output channel of the `step_process` `properties` are passed as inputs. The `TrainingStep` and `ProcessingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) and [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response objects, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "8acddb48", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:06.232317Z", - "iopub.status.busy": "2022-04-18T00:27:06.231534Z", - "iopub.status.idle": "2022-04-18T00:27:06.234111Z", - "shell.execute_reply": "2022-04-18T00:27:06.234602Z" - }, - "papermill": { - "duration": 0.189298, - "end_time": "2022-04-18T00:27:06.234812", - "exception": false, - "start_time": "2022-04-18T00:27:06.045514", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.workflow.properties import PropertyFile\n", - "\n", - "\n", - "evaluation_report = PropertyFile(\n", - " name=\"EvaluationReport\", output_name=\"evaluation\", path=\"evaluation.json\"\n", - ")\n", - "step_eval = ProcessingStep(\n", - " name=\"AbaloneEval\",\n", - " processor=script_eval,\n", - " inputs=[\n", - " ProcessingInput(\n", - " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " destination=\"/opt/ml/processing/model\",\n", - " ),\n", - " ProcessingInput(\n", - " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", - " destination=\"/opt/ml/processing/test\",\n", - " ),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", - " ],\n", - " code=\"abalone/evaluation.py\",\n", - " property_files=[evaluation_report],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "a5540efa", - "metadata": { - "papermill": { - "duration": 0.108782, - "end_time": "2022-04-18T00:27:06.452659", - "exception": false, - "start_time": "2022-04-18T00:27:06.343877", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Model Evaluation Step to Evaluate the Trained Model](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-4.png)" - ] - }, - { - "cell_type": "markdown", - "id": "d772c2b1", - "metadata": { - "papermill": { - "duration": 0.106592, - "end_time": "2022-04-18T00:27:06.739719", - "exception": false, - "start_time": "2022-04-18T00:27:06.633127", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Create Model Step to Create a Model\n", - "\n", - "In order to perform batch transformation using the example model, create a SageMaker model. \n", - "\n", - "Specifically, pass in the `S3ModelArtifacts` from the `TrainingStep`, `step_train` properties. The `TrainingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) response object." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "bfa99742", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:06.958939Z", - "iopub.status.busy": "2022-04-18T00:27:06.958044Z", - "iopub.status.idle": "2022-04-18T00:27:06.960510Z", - "shell.execute_reply": "2022-04-18T00:27:06.960931Z" - }, - "papermill": { - "duration": 0.11483, - "end_time": "2022-04-18T00:27:06.961120", - "exception": false, - "start_time": "2022-04-18T00:27:06.846290", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.model import Model\n", - "\n", - "\n", - "model = Model(\n", - " image_uri=image_uri,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6ff36a22", - "metadata": { - "papermill": { - "duration": 0.106018, - "end_time": "2022-04-18T00:27:07.236643", - "exception": false, - "start_time": "2022-04-18T00:27:07.130625", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Supply the model input (`instance_type` and `accelerator_type`) for creating the SageMaker Model, and then define the `CreateModelStep`, passing in the inputs and the model instance defined before." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "4343f97b", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:07.455511Z", - "iopub.status.busy": "2022-04-18T00:27:07.454736Z", - "iopub.status.idle": "2022-04-18T00:27:07.457542Z", - "shell.execute_reply": "2022-04-18T00:27:07.457062Z" - }, - "papermill": { - "duration": 0.114869, - "end_time": "2022-04-18T00:27:07.457675", - "exception": false, - "start_time": "2022-04-18T00:27:07.342806", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.inputs import CreateModelInput\n", - "from sagemaker.workflow.steps import CreateModelStep\n", - "\n", - "\n", - "inputs = CreateModelInput(\n", - " instance_type=\"ml.m5.large\",\n", - " accelerator_type=\"ml.eia1.medium\",\n", - ")\n", - "step_create_model = CreateModelStep(\n", - " name=\"AbaloneCreateModel\",\n", - " model=model,\n", - " inputs=inputs,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "921a78fe", - "metadata": { - "papermill": { - "duration": 0.169956, - "end_time": "2022-04-18T00:27:07.734340", - "exception": false, - "start_time": "2022-04-18T00:27:07.564384", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Transform Step to Perform Batch Transformation\n", - "\n", - "Now that a model instance is defined, create a `Transformer` instance with the appropriate model type, compute instance type, and desired output S3 URI.\n", - "\n", - "Specifically, pass in the `ModelName` from the `CreateModelStep`, `step_create_model` properties. The `CreateModelStep` `properties` attribute matches the object model of the [DescribeModel](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeModel.html) response object." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "1c5e6294", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:07.956628Z", - "iopub.status.busy": "2022-04-18T00:27:07.955691Z", - "iopub.status.idle": "2022-04-18T00:27:08.033592Z", - "shell.execute_reply": "2022-04-18T00:27:08.033113Z" - }, - "papermill": { - "duration": 0.192424, - "end_time": "2022-04-18T00:27:08.033724", - "exception": false, - "start_time": "2022-04-18T00:27:07.841300", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.transformer import Transformer\n", - "\n", - "\n", - "transformer = Transformer(\n", - " model_name=step_create_model.properties.ModelName,\n", - " instance_type=\"ml.m5.xlarge\",\n", - " instance_count=1,\n", - " output_path=f\"s3://{default_bucket}/AbaloneTransform\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "08c9bd75", - "metadata": { - "papermill": { - "duration": 0.10677, - "end_time": "2022-04-18T00:27:08.246016", - "exception": false, - "start_time": "2022-04-18T00:27:08.139246", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Pass in the transformer instance and the `TransformInput` with the `batch_data` pipeline parameter defined earlier." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "0086570b", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:08.455589Z", - "iopub.status.busy": "2022-04-18T00:27:08.454984Z", - "iopub.status.idle": "2022-04-18T00:27:08.532736Z", - "shell.execute_reply": "2022-04-18T00:27:08.532252Z" - }, - "papermill": { - "duration": 0.190304, - "end_time": "2022-04-18T00:27:08.532863", - "exception": false, - "start_time": "2022-04-18T00:27:08.342559", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.inputs import TransformInput\n", - "from sagemaker.workflow.steps import TransformStep\n", - "\n", - "\n", - "step_transform = TransformStep(\n", - " name=\"AbaloneTransform\", transformer=transformer, inputs=TransformInput(data=batch_data)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d1db8c49", - "metadata": { - "papermill": { - "duration": 0.162352, - "end_time": "2022-04-18T00:27:08.738517", - "exception": false, - "start_time": "2022-04-18T00:27:08.576165", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Register Model Step to Create a Model Package\n", - "\n", - "Use the estimator instance specified in the training step to construct an instance of `RegisterModel`. The result of executing `RegisterModel` in a pipeline is a model package. A model package is an abstraction of reusable model artifacts that packages all ingredients required for inference. Primarily, it consists of an inference specification that defines the inference image to use along with an optional model weights location.\n", - "\n", - "A model package group is a collection of model packages. A model package group can be created for a specific ML business problem, and new versions of the model packages can be added to it. Typically, customers are expected to create a ModelPackageGroup for a SageMaker pipeline so that model package versions can be added to the group for every SageMaker Pipeline run.\n", - "\n", - "The construction of `RegisterModel` is similar to an estimator instance's `register` method in the Python SDK.\n", - "\n", - "Specifically, pass in the `S3ModelArtifacts` from the `TrainingStep`, `step_train` properties. The `TrainingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) response object.\n", - "\n", - "Note that the specific model package group name provided in this notebook can be used in the model registry and CI/CD work with SageMaker Projects." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5763c5cf", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:09.035696Z", - "iopub.status.busy": "2022-04-18T00:27:09.034604Z", - "iopub.status.idle": "2022-04-18T00:27:09.334548Z", - "shell.execute_reply": "2022-04-18T00:27:09.334010Z" - }, - "papermill": { - "duration": 0.477925, - "end_time": "2022-04-18T00:27:09.334667", - "exception": false, - "start_time": "2022-04-18T00:27:08.856742", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.model_metrics import MetricsSource, ModelMetrics\n", - "from sagemaker.workflow.step_collections import RegisterModel\n", - "\n", - "\n", - "model_metrics = ModelMetrics(\n", - " model_statistics=MetricsSource(\n", - " s3_uri=\"{}/evaluation.json\".format(\n", - " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", - " ),\n", - " content_type=\"application/json\",\n", - " )\n", - ")\n", - "step_register = RegisterModel(\n", - " name=\"AbaloneRegisterModel\",\n", - " estimator=xgb_train,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " content_types=[\"text/csv\"],\n", - " response_types=[\"text/csv\"],\n", - " inference_instances=[\"ml.t2.medium\", \"ml.m5.xlarge\"],\n", - " transform_instances=[\"ml.m5.xlarge\"],\n", - " model_package_group_name=model_package_group_name,\n", - " approval_status=model_approval_status,\n", - " model_metrics=model_metrics,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b8b852cf", - "metadata": { - "papermill": { - "duration": 0.078046, - "end_time": "2022-04-18T00:27:09.455768", - "exception": false, - "start_time": "2022-04-18T00:27:09.377722", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Create Model Step and Batch Transform to Process Data in Batch at Scale](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-5.png)" - ] - }, - { - "cell_type": "markdown", - "id": "1a1cec12", - "metadata": { - "papermill": { - "duration": 0.169798, - "end_time": "2022-04-18T00:27:09.731822", - "exception": false, - "start_time": "2022-04-18T00:27:09.562024", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Fail Step to Terminate the Pipeline Execution and Mark it as Failed\n", - "\n", - "This section walks you through the following steps:\n", - "\n", - "* Define a `FailStep` with customized error message, which indicates the cause of the execution failure.\n", - "* Enter the `FailStep` error message with a `Join` function, which appends a static text string with the dynamic `mse_threshold` parameter to build a more informative error message." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "8948e14a", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:09.951031Z", - "iopub.status.busy": "2022-04-18T00:27:09.949925Z", - "iopub.status.idle": "2022-04-18T00:27:09.953764Z", - "shell.execute_reply": "2022-04-18T00:27:09.953320Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "papermill": { - "duration": 0.115047, - "end_time": "2022-04-18T00:27:09.953878", - "exception": false, - "start_time": "2022-04-18T00:27:09.838831", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.workflow.fail_step import FailStep\n", - "from sagemaker.workflow.functions import Join\n", - "\n", - "step_fail = FailStep(\n", - " name=\"AbaloneMSEFail\",\n", - " error_message=Join(on=\" \", values=[\"Execution failed due to MSE >\", mse_threshold]),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "835eaf17", - "metadata": { - "papermill": { - "duration": 0.106951, - "end_time": "2022-04-18T00:27:10.167203", - "exception": false, - "start_time": "2022-04-18T00:27:10.060252", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Fail Step to Terminate the Execution in Failed State](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-8.png)" - ] - }, - { - "cell_type": "markdown", - "id": "9e52c136", - "metadata": { - "papermill": { - "duration": 0.106965, - "end_time": "2022-04-18T00:27:10.444635", - "exception": false, - "start_time": "2022-04-18T00:27:10.337670", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Condition Step to Check Accuracy and Conditionally Create a Model and Run a Batch Transformation and Register a Model in the Model Registry, Or Terminate the Execution in Failed State\n", - "\n", - "In this step, the model is registered only if the accuracy of the model, as determined by the evaluation step `step_eval`, exceeded a specified value. Otherwise, the pipeline execution fails and terminates. A `ConditionStep` enables pipelines to support conditional execution in the pipeline DAG based on the conditions of the step properties.\n", - "\n", - "In the following section, you:\n", - "\n", - "* Define a `ConditionLessThanOrEqualTo` on the accuracy value found in the output of the evaluation step, `step_eval`.\n", - "* Use the condition in the list of conditions in a `ConditionStep`.\n", - "* Pass the `CreateModelStep` and `TransformStep` steps, and the `RegisterModel` step collection into the `if_steps` of the `ConditionStep`, which are only executed if the condition evaluates to `True`.\n", - "* Pass the `FailStep` step into the `else_steps`of the `ConditionStep`, which is only executed if the condition evaluates to `False`." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "454e9b4c", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:10.732662Z", - "iopub.status.busy": "2022-04-18T00:27:10.731797Z", - "iopub.status.idle": "2022-04-18T00:27:10.739016Z", - "shell.execute_reply": "2022-04-18T00:27:10.738481Z" - }, - "papermill": { - "duration": 0.185973, - "end_time": "2022-04-18T00:27:10.739143", - "exception": false, - "start_time": "2022-04-18T00:27:10.553170", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", - "from sagemaker.workflow.condition_step import ConditionStep\n", - "from sagemaker.workflow.functions import JsonGet\n", - "\n", - "\n", - "cond_lte = ConditionLessThanOrEqualTo(\n", - " left=JsonGet(\n", - " step_name=step_eval.name,\n", - " property_file=evaluation_report,\n", - " json_path=\"regression_metrics.mse.value\",\n", - " ),\n", - " right=mse_threshold,\n", - ")\n", - "\n", - "step_cond = ConditionStep(\n", - " name=\"AbaloneMSECond\",\n", - " conditions=[cond_lte],\n", - " if_steps=[step_register, step_create_model, step_transform],\n", - " else_steps=[step_fail],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "1849f21f", - "metadata": { - "papermill": { - "duration": 0.106741, - "end_time": "2022-04-18T00:27:10.953358", - "exception": false, - "start_time": "2022-04-18T00:27:10.846617", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Condition Step to Check Accuracy and Conditionally Execute Steps](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-6.png)" - ] - }, - { - "cell_type": "markdown", - "id": "7e06f7da", - "metadata": { - "papermill": { - "duration": 0.169898, - "end_time": "2022-04-18T00:27:11.231857", - "exception": false, - "start_time": "2022-04-18T00:27:11.061959", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Define a Pipeline of Parameters, Steps, and Conditions\n", - "\n", - "In this section, combine the steps into a Pipeline so it can be executed.\n", - "\n", - "A pipeline requires a `name`, `parameters`, and `steps`. Names must be unique within an `(account, region)` pair.\n", - "\n", - "Note:\n", - "\n", - "* All the parameters used in the definitions must be present.\n", - "* Steps passed into the pipeline do not have to be listed in the order of execution. The SageMaker Pipeline service resolves the data dependency DAG as steps for the execution to complete.\n", - "* Steps must be unique to across the pipeline step list and all condition step if/else lists." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "331e72f7", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:11.453724Z", - "iopub.status.busy": "2022-04-18T00:27:11.452894Z", - "iopub.status.idle": "2022-04-18T00:27:11.548515Z", - "shell.execute_reply": "2022-04-18T00:27:11.548065Z" - }, - "papermill": { - "duration": 0.208191, - "end_time": "2022-04-18T00:27:11.548633", - "exception": false, - "start_time": "2022-04-18T00:27:11.340442", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sagemaker.workflow.pipeline import Pipeline\n", - "\n", - "\n", - "pipeline_name = f\"AbalonePipeline\"\n", - "pipeline = Pipeline(\n", - " name=pipeline_name,\n", - " parameters=[\n", - " processing_instance_type,\n", - " processing_instance_count,\n", - " instance_type,\n", - " model_approval_status,\n", - " input_data,\n", - " batch_data,\n", - " mse_threshold,\n", - " ],\n", - " steps=[step_process, step_train, step_eval, step_cond],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "c12b9f87", - "metadata": { - "papermill": { - "duration": 0.107549, - "end_time": "2022-04-18T00:27:11.763791", - "exception": false, - "start_time": "2022-04-18T00:27:11.656242", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![Define a Pipeline of Parameters, Steps, and Conditions](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-7.png)" - ] - }, - { - "cell_type": "markdown", - "id": "f395b678", - "metadata": { - "papermill": { - "duration": 0.107734, - "end_time": "2022-04-18T00:27:12.042259", - "exception": false, - "start_time": "2022-04-18T00:27:11.934525", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### (Optional) Examining the pipeline definition\n", - "\n", - "The JSON of the pipeline definition can be examined to confirm the pipeline is well-defined and the parameters and step properties resolve correctly." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "a5ed87c3", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:12.264134Z", - "iopub.status.busy": "2022-04-18T00:27:12.263335Z", - "iopub.status.idle": "2022-04-18T00:27:12.933150Z", - "shell.execute_reply": "2022-04-18T00:27:12.932730Z" - }, - "papermill": { - "duration": 0.783967, - "end_time": "2022-04-18T00:27:12.933271", - "exception": false, - "start_time": "2022-04-18T00:27:12.149304", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config\n" - ] - }, - { - "data": { - "text/plain": [ - "{'Version': '2020-12-01',\n", - " 'Metadata': {},\n", - " 'Parameters': [{'Name': 'ProcessingInstanceType',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'ml.m5.xlarge'},\n", - " {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},\n", - " {'Name': 'TrainingInstanceType',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'ml.m5.xlarge'},\n", - " {'Name': 'ModelApprovalStatus',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'PendingManualApproval'},\n", - " {'Name': 'InputData',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 's3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset.csv'},\n", - " {'Name': 'BatchData',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 's3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset-batch'},\n", - " {'Name': 'MseThreshold', 'Type': 'Float', 'DefaultValue': 6.0}],\n", - " 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},\n", - " 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},\n", - " 'Steps': [{'Name': 'AbaloneProcess',\n", - " 'Type': 'Processing',\n", - " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},\n", - " 'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},\n", - " 'VolumeSizeInGB': 30}},\n", - " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3',\n", - " 'ContainerEntrypoint': ['python3',\n", - " '/opt/ml/processing/input/code/preprocessing.py']},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'ProcessingInputs': [{'InputName': 'input-1',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': {'Get': 'Parameters.InputData'},\n", - " 'LocalPath': '/opt/ml/processing/input',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}},\n", - " {'InputName': 'code',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneProcess-a55d50a0c87783b887401dc1ff1d9bf9/input/code/preprocessing.py',\n", - " 'LocalPath': '/opt/ml/processing/input/code',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}}],\n", - " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'train',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneProcess-a55d50a0c87783b887401dc1ff1d9bf9/output/train',\n", - " 'LocalPath': '/opt/ml/processing/train',\n", - " 'S3UploadMode': 'EndOfJob'}},\n", - " {'OutputName': 'validation',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneProcess-a55d50a0c87783b887401dc1ff1d9bf9/output/validation',\n", - " 'LocalPath': '/opt/ml/processing/validation',\n", - " 'S3UploadMode': 'EndOfJob'}},\n", - " {'OutputName': 'test',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneProcess-a55d50a0c87783b887401dc1ff1d9bf9/output/test',\n", - " 'LocalPath': '/opt/ml/processing/test',\n", - " 'S3UploadMode': 'EndOfJob'}}]}}},\n", - " {'Name': 'AbaloneTrain',\n", - " 'Type': 'Training',\n", - " 'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',\n", - " 'TrainingImage': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'},\n", - " 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTrain'},\n", - " 'StoppingCondition': {'MaxRuntimeInSeconds': 86400},\n", - " 'ResourceConfig': {'InstanceCount': 1,\n", - " 'InstanceType': {'Get': 'Parameters.TrainingInstanceType'},\n", - " 'VolumeSizeInGB': 30},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", - " 'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['train'].S3Output.S3Uri\"},\n", - " 'S3DataDistributionType': 'FullyReplicated'}},\n", - " 'ContentType': 'text/csv',\n", - " 'ChannelName': 'train'},\n", - " {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", - " 'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['validation'].S3Output.S3Uri\"},\n", - " 'S3DataDistributionType': 'FullyReplicated'}},\n", - " 'ContentType': 'text/csv',\n", - " 'ChannelName': 'validation'}],\n", - " 'HyperParameters': {'objective': 'reg:linear',\n", - " 'num_round': '50',\n", - " 'max_depth': '5',\n", - " 'eta': '0.2',\n", - " 'gamma': '4',\n", - " 'min_child_weight': '6',\n", - " 'subsample': '0.7'},\n", - " 'ProfilerRuleConfigurations': [{'RuleConfigurationName': 'ProfilerReport-1650241632',\n", - " 'RuleEvaluatorImage': '895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest',\n", - " 'RuleParameters': {'rule_to_invoke': 'ProfilerReport'}}],\n", - " 'ProfilerConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTrain'}}},\n", - " {'Name': 'AbaloneEval',\n", - " 'Type': 'Processing',\n", - " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},\n", - " 'InstanceCount': 1,\n", - " 'VolumeSizeInGB': 30}},\n", - " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", - " 'ContainerEntrypoint': ['python3',\n", - " '/opt/ml/processing/input/code/evaluation.py']},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'ProcessingInputs': [{'InputName': 'input-1',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'},\n", - " 'LocalPath': '/opt/ml/processing/model',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}},\n", - " {'InputName': 'input-2',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['test'].S3Output.S3Uri\"},\n", - " 'LocalPath': '/opt/ml/processing/test',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}},\n", - " {'InputName': 'code',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneEval-3419a950c93b74404394e09ce2edc51e/input/code/evaluation.py',\n", - " 'LocalPath': '/opt/ml/processing/input/code',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}}],\n", - " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'evaluation',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneEval-3419a950c93b74404394e09ce2edc51e/output/evaluation',\n", - " 'LocalPath': '/opt/ml/processing/evaluation',\n", - " 'S3UploadMode': 'EndOfJob'}}]}},\n", - " 'PropertyFiles': [{'PropertyFileName': 'EvaluationReport',\n", - " 'OutputName': 'evaluation',\n", - " 'FilePath': 'evaluation.json'}]},\n", - " {'Name': 'AbaloneMSECond',\n", - " 'Type': 'Condition',\n", - " 'Arguments': {'Conditions': [{'Type': 'LessThanOrEqualTo',\n", - " 'LeftValue': {'Std:JsonGet': {'PropertyFile': {'Get': 'Steps.AbaloneEval.PropertyFiles.EvaluationReport'},\n", - " 'Path': 'regression_metrics.mse.value'}},\n", - " 'RightValue': {'Get': 'Parameters.MseThreshold'}}],\n", - " 'IfSteps': [{'Name': 'AbaloneRegisterModel',\n", - " 'Type': 'RegisterModel',\n", - " 'Arguments': {'ModelPackageGroupName': 'AbaloneModelPackageGroupName',\n", - " 'ModelMetrics': {'ModelQuality': {'Statistics': {'ContentType': 'application/json',\n", - " 'S3Uri': 's3://sagemaker-us-west-2-000000000000/AbaloneEval-3419a950c93b74404394e09ce2edc51e/output/evaluation/evaluation.json'}},\n", - " 'Bias': {},\n", - " 'Explainability': {}},\n", - " 'InferenceSpecification': {'Containers': [{'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", - " 'ModelDataUrl': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'}}],\n", - " 'SupportedContentTypes': ['text/csv'],\n", - " 'SupportedResponseMIMETypes': ['text/csv'],\n", - " 'SupportedRealtimeInferenceInstanceTypes': ['ml.t2.medium',\n", - " 'ml.m5.xlarge'],\n", - " 'SupportedTransformInstanceTypes': ['ml.m5.xlarge']},\n", - " 'ModelApprovalStatus': {'Get': 'Parameters.ModelApprovalStatus'}}},\n", - " {'Name': 'AbaloneCreateModel',\n", - " 'Type': 'Model',\n", - " 'Arguments': {'ExecutionRoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'PrimaryContainer': {'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", - " 'Environment': {},\n", - " 'ModelDataUrl': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'}}}},\n", - " {'Name': 'AbaloneTransform',\n", - " 'Type': 'Transform',\n", - " 'Arguments': {'ModelName': {'Get': 'Steps.AbaloneCreateModel.ModelName'},\n", - " 'TransformInput': {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", - " 'S3Uri': {'Get': 'Parameters.BatchData'}}}},\n", - " 'TransformOutput': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTransform'},\n", - " 'TransformResources': {'InstanceCount': 1,\n", - " 'InstanceType': 'ml.m5.xlarge'}}}],\n", - " 'ElseSteps': [{'Name': 'AbaloneMSEFail',\n", - " 'Type': 'Fail',\n", - " 'Arguments': {'ErrorMessage': {'Std:Join': {'On': ' ',\n", - " 'Values': ['Execution failed due to MSE >',\n", - " {'Get': 'Parameters.MseThreshold'}]}}}}]}}]}" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import json\n", - "\n", - "\n", - "definition = json.loads(pipeline.definition())\n", - "definition" - ] - }, - { - "cell_type": "markdown", - "id": "73ff3cee", - "metadata": { - "papermill": { - "duration": 0.108145, - "end_time": "2022-04-18T00:27:13.148804", - "exception": false, - "start_time": "2022-04-18T00:27:13.040659", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Submit the pipeline to SageMaker and start execution\n", - "\n", - "Submit the pipeline definition to the Pipeline service. The Pipeline service uses the role that is passed in to create all the jobs defined in the steps." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "1cb0aae9", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:13.358032Z", - "iopub.status.busy": "2022-04-18T00:27:13.357539Z", - "iopub.status.idle": "2022-04-18T00:27:14.732229Z", - "shell.execute_reply": "2022-04-18T00:27:14.732881Z" - }, - "papermill": { - "duration": 1.476645, - "end_time": "2022-04-18T00:27:14.733040", - "exception": false, - "start_time": "2022-04-18T00:27:13.256395", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config\n" - ] - }, - { - "data": { - "text/plain": [ - "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline',\n", - " 'ResponseMetadata': {'RequestId': '0dafe696-f7b9-497d-883f-46dd6db4d900',\n", - " 'HTTPStatusCode': 200,\n", - " 'HTTPHeaders': {'x-amzn-requestid': '0dafe696-f7b9-497d-883f-46dd6db4d900',\n", - " 'content-type': 'application/x-amz-json-1.1',\n", - " 'content-length': '83',\n", - " 'date': 'Mon, 18 Apr 2022 00:27:13 GMT'},\n", - " 'RetryAttempts': 0}}" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipeline.upsert(role_arn=role)" - ] - }, - { - "cell_type": "markdown", - "id": "cd4f5795", - "metadata": { - "papermill": { - "duration": 0.072846, - "end_time": "2022-04-18T00:27:14.864017", - "exception": false, - "start_time": "2022-04-18T00:27:14.791171", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Start the pipeline and accept all the default parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "a6c8241e", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:14.979950Z", - "iopub.status.busy": "2022-04-18T00:27:14.978878Z", - "iopub.status.idle": "2022-04-18T00:27:15.221654Z", - "shell.execute_reply": "2022-04-18T00:27:15.221006Z" - }, - "papermill": { - "duration": 0.304951, - "end_time": "2022-04-18T00:27:15.221800", - "exception": false, - "start_time": "2022-04-18T00:27:14.916849", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution = pipeline.start()" - ] - }, - { - "cell_type": "markdown", - "id": "2870497c", - "metadata": { - "papermill": { - "duration": 0.050942, - "end_time": "2022-04-18T00:27:15.328982", - "exception": false, - "start_time": "2022-04-18T00:27:15.278040", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Pipeline Operations: Examining and Waiting for Pipeline Execution\n", - "\n", - "Describe the pipeline execution." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "22643764", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:15.466510Z", - "iopub.status.busy": "2022-04-18T00:27:15.465830Z", - "iopub.status.idle": "2022-04-18T00:27:15.526441Z", - "shell.execute_reply": "2022-04-18T00:27:15.526885Z" - }, - "papermill": { - "duration": 0.145472, - "end_time": "2022-04-18T00:27:15.527029", - "exception": false, - "start_time": "2022-04-18T00:27:15.381557", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline',\n", - " 'PipelineExecutionArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline/execution/3a9dqd5mmh4u',\n", - " 'PipelineExecutionDisplayName': 'execution-1650241635185',\n", - " 'PipelineExecutionStatus': 'Executing',\n", - " 'CreationTime': datetime.datetime(2022, 4, 18, 0, 27, 15, 69000, tzinfo=tzlocal()),\n", - " 'LastModifiedTime': datetime.datetime(2022, 4, 18, 0, 27, 15, 69000, tzinfo=tzlocal()),\n", - " 'CreatedBy': {},\n", - " 'LastModifiedBy': {},\n", - " 'ResponseMetadata': {'RequestId': '59d80668-c513-47e4-b703-d12d838faafb',\n", - " 'HTTPStatusCode': 200,\n", - " 'HTTPHeaders': {'x-amzn-requestid': '59d80668-c513-47e4-b703-d12d838faafb',\n", - " 'content-type': 'application/x-amz-json-1.1',\n", - " 'content-length': '395',\n", - " 'date': 'Mon, 18 Apr 2022 00:27:14 GMT'},\n", - " 'RetryAttempts': 0}}" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "execution.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "fb01f646", - "metadata": { - "papermill": { - "duration": 0.045423, - "end_time": "2022-04-18T00:27:15.619744", - "exception": false, - "start_time": "2022-04-18T00:27:15.574321", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Wait for the execution to complete." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "7fc56591", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:27:15.716189Z", - "iopub.status.busy": "2022-04-18T00:27:15.715544Z", - "iopub.status.idle": "2022-04-18T00:43:19.136668Z", - "shell.execute_reply": "2022-04-18T00:43:19.135601Z" - }, - "papermill": { - "duration": 963.471787, - "end_time": "2022-04-18T00:43:19.136818", - "exception": false, - "start_time": "2022-04-18T00:27:15.665031", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution.wait()" - ] - }, - { - "cell_type": "markdown", - "id": "9245d00b", - "metadata": { - "papermill": { - "duration": 0.062785, - "end_time": "2022-04-18T00:43:19.260264", - "exception": false, - "start_time": "2022-04-18T00:43:19.197479", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "List the steps in the execution. These are the steps in the pipeline that have been resolved by the step executor service." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "bc47938e", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:43:19.392592Z", - "iopub.status.busy": "2022-04-18T00:43:19.390278Z", - "iopub.status.idle": "2022-04-18T00:43:19.469776Z", - "shell.execute_reply": "2022-04-18T00:43:19.470241Z" - }, - "papermill": { - "duration": 0.148627, - "end_time": "2022-04-18T00:43:19.470452", - "exception": false, - "start_time": "2022-04-18T00:43:19.321825", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'StepName': 'AbaloneTransform',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 21, 710000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 43, 5, 175000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-3a9dqd5mmh4u-abalonetransform-shmqcpjkhl'}}},\n", - " {'StepName': 'AbaloneCreateModel',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 20, 252000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 21, 188000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-3a9dqd5mmh4u-abalonecreatemodel-nr28ooxmjn'}}},\n", - " {'StepName': 'AbaloneRegisterModel',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 20, 252000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 22, 18000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/691'}}},\n", - " {'StepName': 'AbaloneMSECond',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 19, 251000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 19, 670000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'Condition': {'Outcome': 'True'}}},\n", - " {'StepName': 'AbaloneEval',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 34, 22, 219000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 18, 302000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-3a9dqd5mmh4u-abaloneeval-rqz0ph05st'}}},\n", - " {'StepName': 'AbaloneTrain',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 31, 35, 376000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 34, 21, 590000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-3a9dqd5mmh4u-abalonetrain-slip45cf16'}}},\n", - " {'StepName': 'AbaloneProcess',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 27, 16, 184000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 31, 34, 404000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-3a9dqd5mmh4u-abaloneprocess-ck1tj9s93u'}}}]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "execution.list_steps()" - ] - }, - { - "cell_type": "markdown", - "id": "516f36f1", - "metadata": { - "papermill": { - "duration": 0.059518, - "end_time": "2022-04-18T00:43:19.587283", - "exception": false, - "start_time": "2022-04-18T00:43:19.527765", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### Examining the Evaluation\n", - "\n", - "Examine the resulting model evaluation after the pipeline completes. Download the resulting `evaluation.json` file from S3 and print the report." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "544fba16", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:43:19.792493Z", - "iopub.status.busy": "2022-04-18T00:43:19.790673Z", - "iopub.status.idle": "2022-04-18T00:43:19.963299Z", - "shell.execute_reply": "2022-04-18T00:43:19.962889Z" - }, - "papermill": { - "duration": 0.30999, - "end_time": "2022-04-18T00:43:19.963417", - "exception": false, - "start_time": "2022-04-18T00:43:19.653427", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'regression_metrics': {'mse': {'standard_deviation': 2.1549187830947223,\n", - " 'value': 4.64537577872224}}}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "\n", - "\n", - "evaluation_json = sagemaker.s3.S3Downloader.read_file(\n", - " \"{}/evaluation.json\".format(\n", - " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", - " )\n", - ")\n", - "pprint(json.loads(evaluation_json))" - ] - }, - { - "cell_type": "markdown", - "id": "f2a7eace", - "metadata": { - "papermill": { - "duration": 0.047263, - "end_time": "2022-04-18T00:43:20.057839", - "exception": false, - "start_time": "2022-04-18T00:43:20.010576", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### Lineage\n", - "\n", - "Review the lineage of the artifacts generated by the pipeline." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "6cac91a1", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:43:20.159788Z", - "iopub.status.busy": "2022-04-18T00:43:20.159020Z", - "iopub.status.idle": "2022-04-18T00:43:56.906826Z", - "shell.execute_reply": "2022-04-18T00:43:56.907212Z" - }, - "papermill": { - "duration": 36.799704, - "end_time": "2022-04-18T00:43:56.907470", - "exception": false, - "start_time": "2022-04-18T00:43:20.107766", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneProcess', 'StartTime': datetime.datetime(2022, 4, 18, 0, 27, 16, 184000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 31, 34, 404000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-3a9dqd5mmh4u-abaloneprocess-ck1tj9s93u'}}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...1dc1ff1d9bf9/input/code/preprocessing.pyInputDataSetContributedToartifact
1s3://...000000000000/abalone/abalone-dataset.csvInputDataSetContributedToartifact
224661...om/sagemaker-scikit-learn:1.0-1-cpu-py3InputImageContributedToartifact
3s3://...50a0c87783b887401dc1ff1d9bf9/output/testOutputDataSetProducedartifact
4s3://...7783b887401dc1ff1d9bf9/output/validationOutputDataSetProducedartifact
5s3://...0a0c87783b887401dc1ff1d9bf9/output/trainOutputDataSetProducedartifact
\n", - "
" - ], - "text/plain": [ - " Name/Source Direction Type \\\n", - "0 s3://...1dc1ff1d9bf9/input/code/preprocessing.py Input DataSet \n", - "1 s3://...000000000000/abalone/abalone-dataset.csv Input DataSet \n", - "2 24661...om/sagemaker-scikit-learn:1.0-1-cpu-py3 Input Image \n", - "3 s3://...50a0c87783b887401dc1ff1d9bf9/output/test Output DataSet \n", - "4 s3://...7783b887401dc1ff1d9bf9/output/validation Output DataSet \n", - "5 s3://...0a0c87783b887401dc1ff1d9bf9/output/train Output DataSet \n", - "\n", - " Association Type Lineage Type \n", - "0 ContributedTo artifact \n", - "1 ContributedTo artifact \n", - "2 ContributedTo artifact \n", - "3 Produced artifact \n", - "4 Produced artifact \n", - "5 Produced artifact " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneTrain', 'StartTime': datetime.datetime(2022, 4, 18, 0, 31, 35, 376000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 34, 21, 590000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-3a9dqd5mmh4u-abalonetrain-slip45cf16'}}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...7783b887401dc1ff1d9bf9/output/validationInputDataSetContributedToartifact
1s3://...0a0c87783b887401dc1ff1d9bf9/output/trainInputDataSetContributedToartifact
224661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
3s3://...loneTrain-SLIp45cF16/output/model.tar.gzOutputModelProducedartifact
\n", - "
" - ], - "text/plain": [ - " Name/Source Direction Type \\\n", - "0 s3://...7783b887401dc1ff1d9bf9/output/validation Input DataSet \n", - "1 s3://...0a0c87783b887401dc1ff1d9bf9/output/train Input DataSet \n", - "2 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", - "3 s3://...loneTrain-SLIp45cF16/output/model.tar.gz Output Model \n", - "\n", - " Association Type Lineage Type \n", - "0 ContributedTo artifact \n", - "1 ContributedTo artifact \n", - "2 ContributedTo artifact \n", - "3 Produced artifact " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneEval', 'StartTime': datetime.datetime(2022, 4, 18, 0, 34, 22, 219000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 18, 302000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-3a9dqd5mmh4u-abaloneeval-rqz0ph05st'}}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...394e09ce2edc51e/input/code/evaluation.pyInputDataSetContributedToartifact
1s3://...50a0c87783b887401dc1ff1d9bf9/output/testInputDataSetContributedToartifact
2s3://...loneTrain-SLIp45cF16/output/model.tar.gzInputModelContributedToartifact
324661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
4s3://...3b74404394e09ce2edc51e/output/evaluationOutputDataSetProducedartifact
\n", - "
" - ], - "text/plain": [ - " Name/Source Direction Type \\\n", - "0 s3://...394e09ce2edc51e/input/code/evaluation.py Input DataSet \n", - "1 s3://...50a0c87783b887401dc1ff1d9bf9/output/test Input DataSet \n", - "2 s3://...loneTrain-SLIp45cF16/output/model.tar.gz Input Model \n", - "3 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", - "4 s3://...3b74404394e09ce2edc51e/output/evaluation Output DataSet \n", - "\n", - " Association Type Lineage Type \n", - "0 ContributedTo artifact \n", - "1 ContributedTo artifact \n", - "2 ContributedTo artifact \n", - "3 ContributedTo artifact \n", - "4 Produced artifact " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneMSECond', 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 19, 251000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 19, 670000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'Condition': {'Outcome': 'True'}}}\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneRegisterModel', 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 20, 252000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 22, 18000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/691'}}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...loneTrain-SLIp45cF16/output/model.tar.gzInputModelContributedToartifact
124661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
2abalonemodelpackagegroupname-691-PendingManual...InputApprovalContributedToaction
3AbaloneModelPackageGroupName-1607556271-aws-mo...OutputModelGroupAssociatedWithcontext
\n", - "
" - ], - "text/plain": [ - " Name/Source Direction Type \\\n", - "0 s3://...loneTrain-SLIp45cF16/output/model.tar.gz Input Model \n", - "1 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", - "2 abalonemodelpackagegroupname-691-PendingManual... Input Approval \n", - "3 AbaloneModelPackageGroupName-1607556271-aws-mo... Output ModelGroup \n", - "\n", - " Association Type Lineage Type \n", - "0 ContributedTo artifact \n", - "1 ContributedTo artifact \n", - "2 ContributedTo action \n", - "3 AssociatedWith context " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneCreateModel', 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 20, 252000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 38, 21, 188000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-3a9dqd5mmh4u-abalonecreatemodel-nr28ooxmjn'}}}\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'StepName': 'AbaloneTransform', 'StartTime': datetime.datetime(2022, 4, 18, 0, 38, 21, 710000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 4, 18, 0, 43, 5, 175000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-3a9dqd5mmh4u-abalonetransform-shmqcpjkhl'}}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...loneTrain-SLIp45cF16/output/model.tar.gzInputModelContributedToartifact
124661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
2s3://...1695447989/abalone/abalone-dataset-batchInputDataSetContributedToartifact
3s3://...-us-west-2-000000000000/AbaloneTransformOutputDataSetProducedartifact
\n", - "
" - ], - "text/plain": [ - " Name/Source Direction Type \\\n", - "0 s3://...loneTrain-SLIp45cF16/output/model.tar.gz Input Model \n", - "1 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", - "2 s3://...1695447989/abalone/abalone-dataset-batch Input DataSet \n", - "3 s3://...-us-west-2-000000000000/AbaloneTransform Output DataSet \n", - "\n", - " Association Type Lineage Type \n", - "0 ContributedTo artifact \n", - "1 ContributedTo artifact \n", - "2 ContributedTo artifact \n", - "3 Produced artifact " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import time\n", - "from sagemaker.lineage.visualizer import LineageTableVisualizer\n", - "\n", - "\n", - "viz = LineageTableVisualizer(sagemaker.session.Session())\n", - "for execution_step in reversed(execution.list_steps()):\n", - " print(execution_step)\n", - " display(viz.show(pipeline_execution_step=execution_step))\n", - " time.sleep(5)" - ] - }, - { - "cell_type": "markdown", - "id": "b0f2959e", - "metadata": { - "papermill": { - "duration": 0.085763, - "end_time": "2022-04-18T00:43:57.076519", - "exception": false, - "start_time": "2022-04-18T00:43:56.990756", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "### Parametrized Executions\n", - "\n", - "You can run additional executions of the pipeline and specify different pipeline parameters. The `parameters` argument is a dictionary containing parameter names, and where the values are used to override the defaults values.\n", - "\n", - "Based on the performance of the model, you might want to kick off another pipeline execution on a compute-optimized instance type and set the model approval status to \"Approved\" automatically. This means that the model package version generated by the `RegisterModel` step is automatically ready for deployment through CI/CD pipelines, such as with SageMaker Projects." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "7900a64c", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:43:57.215340Z", - "iopub.status.busy": "2022-04-18T00:43:57.214645Z", - "iopub.status.idle": "2022-04-18T00:43:57.436693Z", - "shell.execute_reply": "2022-04-18T00:43:57.435572Z" - }, - "papermill": { - "duration": 0.289411, - "end_time": "2022-04-18T00:43:57.436836", - "exception": false, - "start_time": "2022-04-18T00:43:57.147425", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution = pipeline.start(\n", - " parameters=dict(\n", - " ProcessingInstanceType=\"ml.c5.xlarge\",\n", - " ModelApprovalStatus=\"Approved\",\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "ac422422", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:43:57.740793Z", - "iopub.status.busy": "2022-04-18T00:43:57.737704Z", - "iopub.status.idle": "2022-04-18T00:59:31.046838Z", - "shell.execute_reply": "2022-04-18T00:59:31.044279Z" - }, - "papermill": { - "duration": 933.469555, - "end_time": "2022-04-18T00:59:31.046992", - "exception": false, - "start_time": "2022-04-18T00:43:57.577437", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution.wait()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "7d9dec71", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:59:31.185427Z", - "iopub.status.busy": "2022-04-18T00:59:31.184766Z", - "iopub.status.idle": "2022-04-18T00:59:31.296309Z", - "shell.execute_reply": "2022-04-18T00:59:31.296777Z" - }, - "papermill": { - "duration": 0.178516, - "end_time": "2022-04-18T00:59:31.296922", - "exception": false, - "start_time": "2022-04-18T00:59:31.118406", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'StepName': 'AbaloneTransform',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 55, 17, 609000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 59, 24, 521000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-9275502uvwe2-abalonetransform-sdqf868czf'}}},\n", - " {'StepName': 'AbaloneCreateModel',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 55, 16, 365000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 55, 17, 124000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-9275502uvwe2-abalonecreatemodel-qbuz4uirnh'}}},\n", - " {'StepName': 'AbaloneRegisterModel',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 55, 16, 365000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 55, 16, 953000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/692'}}},\n", - " {'StepName': 'AbaloneMSECond',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 55, 15, 608000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 55, 15, 837000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'Condition': {'Outcome': 'True'}}},\n", - " {'StepName': 'AbaloneEval',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 51, 6, 271000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 55, 14, 745000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-9275502uvwe2-abaloneeval-v43er0jak2'}}},\n", - " {'StepName': 'AbaloneTrain',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 48, 20, 426000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 51, 5, 564000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-9275502uvwe2-abalonetrain-6rtveegego'}}},\n", - " {'StepName': 'AbaloneProcess',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 43, 58, 687000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 0, 48, 18, 976000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-9275502uvwe2-abaloneprocess-vynjsz7vps'}}}]" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "execution.list_steps()" - ] - }, - { - "cell_type": "markdown", - "id": "bcda6312", - "metadata": { - "papermill": { - "duration": 0.116815, - "end_time": "2022-04-18T00:59:31.473495", - "exception": false, - "start_time": "2022-04-18T00:59:31.356680", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Apart from that, you might also want to adjust the MSE threshold to a smaller value and raise the bar for the accuracy of the registered model. In this case you can override the MSE threshold like the following:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "a844f423", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:59:31.674910Z", - "iopub.status.busy": "2022-04-18T00:59:31.674308Z", - "iopub.status.idle": "2022-04-18T00:59:31.880748Z", - "shell.execute_reply": "2022-04-18T00:59:31.881195Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "papermill": { - "duration": 0.283862, - "end_time": "2022-04-18T00:59:31.881345", - "exception": false, - "start_time": "2022-04-18T00:59:31.597483", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution = pipeline.start(parameters=dict(MseThreshold=3.0))" - ] - }, - { - "cell_type": "markdown", - "id": "ff01c5f8", - "metadata": { - "papermill": { - "duration": 0.053964, - "end_time": "2022-04-18T00:59:32.002325", - "exception": false, - "start_time": "2022-04-18T00:59:31.948361", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "If the MSE threshold is not satisfied, the pipeline execution enters the `FailStep` and is marked as failed." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "9cc025f7", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:59:32.111415Z", - "iopub.status.busy": "2022-04-18T00:59:32.110753Z", - "iopub.status.idle": "2022-04-18T01:11:34.809867Z", - "shell.execute_reply": "2022-04-18T01:11:34.810467Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "papermill": { - "duration": 722.756876, - "end_time": "2022-04-18T01:11:34.810765", - "exception": false, - "start_time": "2022-04-18T00:59:32.053889", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiter PipelineExecutionComplete failed: Waiter encountered a terminal failure state: For expression \"PipelineExecutionStatus\" we matched expected path: \"Failed\"\n" - ] - } - ], - "source": [ - "try:\n", - " execution.wait()\n", - "except Exception as error:\n", - " print(error)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "8dd35ff3", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T01:11:34.938869Z", - "iopub.status.busy": "2022-04-18T01:11:34.938199Z", - "iopub.status.idle": "2022-04-18T01:11:35.018383Z", - "shell.execute_reply": "2022-04-18T01:11:35.019866Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "papermill": { - "duration": 0.147713, - "end_time": "2022-04-18T01:11:35.020129", - "exception": false, - "start_time": "2022-04-18T01:11:34.872416", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'StepName': 'AbaloneMSEFail',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 1, 11, 4, 749000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 1, 11, 5, 139000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Failed',\n", - " 'AttemptCount': 0,\n", - " 'FailureReason': 'Execution failed due to MSE > 3.0',\n", - " 'Metadata': {'Fail': {'ErrorMessage': 'Execution failed due to MSE > 3.0'}}},\n", - " {'StepName': 'AbaloneMSECond',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 1, 11, 3, 792000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 1, 11, 4, 204000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'Condition': {'Outcome': 'False'}}},\n", - " {'StepName': 'AbaloneEval',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 1, 6, 43, 271000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 1, 11, 2, 665000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-0xj80rygp8xr-abaloneeval-ffrpzxrakl'}}},\n", - " {'StepName': 'AbaloneTrain',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 1, 3, 53, 900000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 1, 6, 42, 35000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-0xj80rygp8xr-abalonetrain-u6uyl1wglo'}}},\n", - " {'StepName': 'AbaloneProcess',\n", - " 'StartTime': datetime.datetime(2022, 4, 18, 0, 59, 32, 429000, tzinfo=tzlocal()),\n", - " 'EndTime': datetime.datetime(2022, 4, 18, 1, 3, 53, 361000, tzinfo=tzlocal()),\n", - " 'StepStatus': 'Succeeded',\n", - " 'AttemptCount': 0,\n", - " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-0xj80rygp8xr-abaloneprocess-cwlzrt0ero'}}}]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "execution.list_steps()" - ] - } - ], - "metadata": { - "instance_type": "ml.t3.medium", - "kernelspec": { - "display_name": "Python 3 (Data Science)", - "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:000000000000:image/datascience-1.0" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.10" - }, - "papermill": { - "default_parameters": {}, - "duration": 2698.146273, - "end_time": "2022-04-18T01:11:36.131062", - "environment_variables": {}, - "exception": null, - "input_path": "sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb", - "output_path": "/opt/ml/processing/output/sagemaker-pipelines-preprocess-train-evaluate-batch-transform-2022-04-18-00-13-04.ipynb", - "parameters": { - "kms_key": "arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab" - }, - "start_time": "2022-04-18T00:26:37.984789", - "version": "2.3.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py index 51ff704ff0..ec0717e582 100644 --- a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py +++ b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py @@ -1,42 +1,42 @@ import boto3 import json -iam = boto3.client('iam') +iam = boto3.client("iam") + def create_lambda_role(role_name): try: response = iam.create_role( - RoleName = role_name, - AssumeRolePolicyDocument = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "lambda.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }), - Description='Role for Lambda to call SageMaker functions' + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole", + } + ], + } + ), + Description="Role for Lambda to call SageMaker functions", ) - role_arn = response['Role']['Arn'] + role_arn = response["Role"]["Arn"] response = iam.attach_role_policy( RoleName=role_name, - PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole' + PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", ) response = iam.attach_role_policy( - PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess', - RoleName=role_name + PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess", RoleName=role_name ) return role_arn except iam.exceptions.EntityAlreadyExistsException: - print(f'Using ARN from existing role: {role_name}') + print(f"Using ARN from existing role: {role_name}") response = iam.get_role(RoleName=role_name) - return response['Role']['Arn'] \ No newline at end of file + return response["Role"]["Arn"] diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 9466e321c3..695995a096 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -6,7 +6,7 @@ "source": [ "# SageMaker Pipelines Lambda Step\n", "\n", - "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline. \n", + "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline.\n", "\n", "The steps in this pipeline include:\n", "* Preprocess the Abalone dataset\n", @@ -94,13 +94,17 @@ " }\n", " ]\n", "}\n", - " \n", + "\n", "```" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Let's start by importing necessary packages and installing the SageMaker Python SDK." ] @@ -108,7 +112,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -142,10 +150,11 @@ " LambdaOutput,\n", " LambdaOutputTypeEnum,\n", ")\n", - "from sagemaker.workflow.step_collections import CreateModelStep\n", + "from sagemaker.workflow.model_step import ModelStep\n", "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", "from sagemaker.workflow.condition_step import ConditionStep\n", "from sagemaker.workflow.functions import JsonGet\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", "\n", "from sagemaker.lambda_helper import Lambda\n", "import sys" @@ -154,10 +163,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "!{sys.executable} -m pip install \"sagemaker==2.91.1\"" + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"" ] }, { @@ -177,12 +190,17 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the SageMaker Session\n", "\n", "sagemaker_session = sagemaker.Session()\n", + "pipeline_session = PipelineSession()\n", "sm_client = sagemaker_session.sagemaker_client\n", "region = sagemaker_session.boto_region_name\n", "prefix = \"lambda-step-pipeline\"\n", @@ -193,7 +211,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Define variables and parameters needed for the Pipeline steps\n", @@ -226,7 +248,7 @@ "source": [ "## Data Preparation\n", "\n", - "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets. \n", + "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets.\n", "\n", "The output of this step is used as the input to the TrainingStep." ] @@ -234,10 +256,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!mkdir -p code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile preprocess.py\n", + "%%writefile code/preprocess.py\n", "\n", "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", "import argparse\n", @@ -367,7 +406,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Process the training data step using a python script.\n", @@ -378,19 +421,23 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=processing_instance_count,\n", " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", - "step_process = ProcessingStep(\n", - " name=\"PreprocessAbaloneData\",\n", - " processor=sklearn_processor,\n", + "\n", + "processor_args = sklearn_processor.run(\n", " outputs=[\n", " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", " ],\n", - " code=\"preprocess.py\",\n", - " job_arguments=[\"--input-data\", input_data],\n", + " code=\"code/preprocess.py\",\n", + " arguments=[\"--input-data\", input_data],\n", + ")\n", + "\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneData\",\n", + " step_args=processor_args,\n", " cache_config=cache_config,\n", ")" ] @@ -414,7 +461,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n", @@ -434,7 +485,7 @@ " instance_count=1,\n", " output_path=model_path,\n", " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", @@ -449,9 +500,7 @@ " silent=0,\n", ")\n", "\n", - "step_train = TrainingStep(\n", - " name=\"TrainAbaloneModel\",\n", - " estimator=xgb_train,\n", + "train_args = xgb_train.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", @@ -464,6 +513,11 @@ " content_type=\"text/csv\",\n", " ),\n", " },\n", + ")\n", + "\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " step_args=train_args,\n", " cache_config=cache_config,\n", ")" ] @@ -474,16 +528,20 @@ "source": [ "#### Evaluating the model\n", "\n", - "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model is created and a Lambda function is invoked to deploy the model to a SageMaker Endpoint. " + "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model is created and a Lambda function is invoked to deploy the model to a SageMaker Endpoint." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile evaluate.py\n", + "%%writefile code/evaluate.py\n", "\n", "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", "import json\n", @@ -545,7 +603,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# A ProcessingStep is used to evaluate the performance of the trained model.\n", @@ -557,7 +619,7 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=1,\n", " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", @@ -567,9 +629,7 @@ " path=\"evaluation.json\",\n", ")\n", "\n", - "step_eval = ProcessingStep(\n", - " name=\"EvaluateAbaloneModel\",\n", - " processor=script_eval,\n", + "eval_args = script_eval.run(\n", " inputs=[\n", " ProcessingInput(\n", " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", @@ -587,7 +647,11 @@ " destination=f\"s3://{default_bucket}/{s3_prefix}/evaluation_report\",\n", " ),\n", " ],\n", - " code=\"evaluate.py\",\n", + " code=\"code/evaluate.py\",\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " step_args=eval_args,\n", " property_files=[evaluation_report],\n", " cache_config=cache_config,\n", ")" @@ -605,22 +669,25 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create Model\n", "model = Model(\n", " image_uri=image_uri,\n", " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", " predictor_cls=XGBoostPredictor,\n", ")\n", "\n", - "step_create_model = CreateModelStep(\n", + "step_create_model = ModelStep(\n", " name=\"CreateModel\",\n", - " model=model,\n", - " inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n", + " step_args=model.create(\"ml.m4.large\"),\n", ")" ] }, @@ -630,11 +697,11 @@ "source": [ "## Setting up Lambda\n", "\n", - "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function. \n", + "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function.\n", "\n", "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n", "\n", - "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary. " + "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary." ] }, { @@ -643,19 +710,23 @@ "source": [ "#### Define the Lambda function\n", "\n", - "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the Lambda step. " + "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the Lambda step." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile lambda_helper.py\n", + "%%writefile code/lambda_helper.py\n", "\n", "\"\"\"\n", - "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint. \n", + "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint.\n", "The name of the model to deploy is provided via the `event` argument\n", "\"\"\"\n", "\n", @@ -703,9 +774,9 @@ "source": [ "#### Setting up the custom IAM Role\n", "\n", - "The Lambda function needs an IAM role that allows it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n", + "The Lambda function needs an IAM role that allows it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep.\n", "\n", - "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", + "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies.\n", "\n", "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy with least privileges as per AWS IAM best practices." ] @@ -713,7 +784,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from iam_helper import create_lambda_role\n", @@ -724,7 +799,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Custom Lambda Step\n", @@ -740,7 +819,7 @@ "func = Lambda(\n", " function_name=function_name,\n", " execution_role_arn=lambda_role,\n", - " script=\"lambda_helper.py\",\n", + " script=\"code/lambda_helper.py\",\n", " handler=\"lambda_helper.lambda_handler\",\n", ")\n", "\n", @@ -763,7 +842,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# ConditionStep for evaluating model quality and branching execution.\n", @@ -789,7 +872,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Use the same pipeline name across executions for cache usage.\n", @@ -805,7 +892,7 @@ " model_approval_status,\n", " ],\n", " steps=[step_process, step_train, step_eval, step_cond],\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", ")" ] }, @@ -819,7 +906,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import json\n", @@ -831,7 +922,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.upsert(role_arn=role)" @@ -840,7 +935,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution = pipeline.start()" @@ -849,7 +948,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.wait()" @@ -858,7 +961,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create a SageMaker client\n", @@ -886,7 +993,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available\n", @@ -916,9 +1027,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -930,7 +1041,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.6.14" }, "metadata": { "interpreter": { @@ -940,4 +1051,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb deleted file mode 100644 index d816d2f766..0000000000 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb +++ /dev/null @@ -1,1724 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6d1e3a0f", - "metadata": { - "papermill": { - "duration": 0.015373, - "end_time": "2022-04-18T00:17:45.715541", - "exception": false, - "start_time": "2022-04-18T00:17:45.700168", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# SageMaker Pipelines Lambda Step\n", - "\n", - "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline. \n", - "\n", - "The steps in this pipeline include:\n", - "* Preprocess the Abalone dataset\n", - "* Train an XGBoost Model\n", - "* Evaluate the model performance\n", - "* Create a model\n", - "* Deploy the model to a SageMaker Hosted Endpoint using a Lambda Function, through SageMaker Pipelines\n", - "\n", - "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step." - ] - }, - { - "cell_type": "markdown", - "id": "9789db0e", - "metadata": { - "papermill": { - "duration": 0.015114, - "end_time": "2022-04-18T00:17:45.745929", - "exception": false, - "start_time": "2022-04-18T00:17:45.730815", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Runtime\n", - "\n", - "This notebook takes approximately 15 minutes to run.\n", - "\n", - "## Contents\n", - "\n", - "1. [Prerequisites](#Prerequisites)\n", - "1. [Configuration Setup](#Configuration-Setup)\n", - "1. [Data Preparation](#Data-Preparation)\n", - "1. [Model Training and Evaluation](#Model-Training-and-Evaluation)\n", - "1. [Setting up Lambda](#Setting-up-Lambda)\n", - "1. [Execute the Pipeline](#Execute-the-Pipeline)\n", - "1. [Clean up resources](#Clean-up-resources)" - ] - }, - { - "cell_type": "markdown", - "id": "29ba2743", - "metadata": { - "papermill": { - "duration": 0.015126, - "end_time": "2022-04-18T00:17:45.776211", - "exception": false, - "start_time": "2022-04-18T00:17:45.761085", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Prerequisites\n", - "\n", - "The notebook execution role should have policies which enable the notebook to create a Lambda function. The Amazon managed policy `AmazonSageMakerPipelinesIntegrations` can be added to the notebook execution role to achieve the same effect.\n", - "\n", - "The policy description is as follows:\n", - "\n", - "```\n", - "\n", - "{\n", - " \"Version\": \"2012-10-17\",\n", - " \"Statement\": [\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Action\": [\n", - " \"lambda:CreateFunction\",\n", - " \"lambda:DeleteFunction\",\n", - " \"lambda:InvokeFunction\",\n", - " \"lambda:UpdateFunctionCode\"\n", - " ],\n", - " \"Resource\": [\n", - " \"arn:aws:lambda:*:*:function:*sagemaker*\",\n", - " \"arn:aws:lambda:*:*:function:*sageMaker*\",\n", - " \"arn:aws:lambda:*:*:function:*SageMaker*\"\n", - " ]\n", - " },\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Action\": [\n", - " \"sqs:CreateQueue\",\n", - " \"sqs:SendMessage\"\n", - " ],\n", - " \"Resource\": [\n", - " \"arn:aws:sqs:*:*:*sagemaker*\",\n", - " \"arn:aws:sqs:*:*:*sageMaker*\",\n", - " \"arn:aws:sqs:*:*:*SageMaker*\"\n", - " ]\n", - " },\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Action\": [\n", - " \"iam:PassRole\"\n", - " ],\n", - " \"Resource\": \"arn:aws:iam::*:role/*\",\n", - " \"Condition\": {\n", - " \"StringEquals\": {\n", - " \"iam:PassedToService\": [\n", - " \"lambda.amazonaws.com\"\n", - " ]\n", - " }\n", - " }\n", - " }\n", - " ]\n", - "}\n", - " \n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "c5b8a07e", - "metadata": { - "papermill": { - "duration": 0.01508, - "end_time": "2022-04-18T00:17:45.806414", - "exception": false, - "start_time": "2022-04-18T00:17:45.791334", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Let's start by importing necessary packages and installing the SageMaker Python SDK." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "9e80d1ce", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:45.843391Z", - "iopub.status.busy": "2022-04-18T00:17:45.842642Z", - "iopub.status.idle": "2022-04-18T00:17:46.319287Z", - "shell.execute_reply": "2022-04-18T00:17:46.318882Z" - }, - "papermill": { - "duration": 0.497958, - "end_time": "2022-04-18T00:17:46.319400", - "exception": false, - "start_time": "2022-04-18T00:17:45.821442", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "import os\n", - "import time\n", - "import boto3\n", - "import sagemaker\n", - "\n", - "from sagemaker.estimator import Estimator\n", - "from sagemaker.inputs import TrainingInput\n", - "\n", - "from sagemaker.processing import (\n", - " ProcessingInput,\n", - " ProcessingOutput,\n", - " Processor,\n", - " ScriptProcessor,\n", - ")\n", - "\n", - "from sagemaker import Model\n", - "from sagemaker.xgboost import XGBoostPredictor\n", - "from sagemaker.sklearn.processing import SKLearnProcessor\n", - "\n", - "from sagemaker.workflow.parameters import (\n", - " ParameterInteger,\n", - " ParameterString,\n", - ")\n", - "from sagemaker.workflow.pipeline import Pipeline\n", - "from sagemaker.workflow.properties import PropertyFile\n", - "from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CacheConfig\n", - "from sagemaker.workflow.lambda_step import (\n", - " LambdaStep,\n", - " LambdaOutput,\n", - " LambdaOutputTypeEnum,\n", - ")\n", - "from sagemaker.workflow.step_collections import CreateModelStep\n", - "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", - "from sagemaker.workflow.condition_step import ConditionStep\n", - "from sagemaker.workflow.functions import JsonGet\n", - "\n", - "from sagemaker.lambda_helper import Lambda\n", - "import sys" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ac873be6", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:46.354877Z", - "iopub.status.busy": "2022-04-18T00:17:46.354174Z", - "iopub.status.idle": "2022-04-18T00:17:48.093814Z", - "shell.execute_reply": "2022-04-18T00:17:48.094221Z" - }, - "papermill": { - "duration": 1.759493, - "end_time": "2022-04-18T00:17:48.094394", - "exception": false, - "start_time": "2022-04-18T00:17:46.334901", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n", - " from cryptography.utils import int_from_bytes\n", - "/opt/conda/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n", - " from cryptography.utils import int_from_bytes\n", - "Requirement already satisfied: sagemaker>=2.51.0 in /opt/conda/lib/python3.7/site-packages (2.69.1.dev0)\n", - "Requirement already satisfied: numpy>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (1.21.1)\n", - "Requirement already satisfied: protobuf>=3.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (3.17.3)\n", - "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (0.2.0)\n", - "Requirement already satisfied: protobuf3-to-dict>=0.1.5 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (0.1.5)\n", - "Requirement already satisfied: importlib-metadata>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (1.5.0)\n", - "Requirement already satisfied: attrs in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (19.3.0)\n", - "Requirement already satisfied: boto3>=1.16.32 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (1.20.7)\n", - "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (1.0.1)\n", - "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (0.2.8)\n", - "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (20.1)\n", - "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.51.0) (1.0.1)\n", - "Requirement already satisfied: botocore<1.24.0,>=1.23.7 in /opt/conda/lib/python3.7/site-packages (from boto3>=1.16.32->sagemaker>=2.51.0) (1.23.7)\n", - "Requirement already satisfied: s3transfer<0.6.0,>=0.5.0 in /opt/conda/lib/python3.7/site-packages (from boto3>=1.16.32->sagemaker>=2.51.0) (0.5.0)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3>=1.16.32->sagemaker>=2.51.0) (0.10.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.7->boto3>=1.16.32->sagemaker>=2.51.0) (1.26.6)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.7->boto3>=1.16.32->sagemaker>=2.51.0) (2.8.1)\n", - "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=1.4.0->sagemaker>=2.51.0) (2.2.0)\n", - "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.51.0) (1.14.0)\n", - "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.51.0) (2.4.6)\n", - "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker>=2.51.0) (2019.3)\n", - "Requirement already satisfied: pox>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.51.0) (0.3.0)\n", - "Requirement already satisfied: dill>=0.3.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.51.0) (0.3.4)\n", - "Requirement already satisfied: multiprocess>=0.70.12 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.51.0) (0.70.12.2)\n", - "Requirement already satisfied: ppft>=1.6.6.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.51.0) (1.6.6.4)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n", - "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.0.4 is available.\n", - "You should consider upgrading via the '/opt/conda/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n" - ] - } - ], - "source": [ - "!{sys.executable} -m pip install \"sagemaker>=2.51.0\"" - ] - }, - { - "cell_type": "markdown", - "id": "322f8d23", - "metadata": { - "papermill": { - "duration": 0.016932, - "end_time": "2022-04-18T00:17:48.128714", - "exception": false, - "start_time": "2022-04-18T00:17:48.111782", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Configuration Setup" - ] - }, - { - "cell_type": "markdown", - "id": "deffe595", - "metadata": { - "papermill": { - "duration": 0.016766, - "end_time": "2022-04-18T00:17:48.162442", - "exception": false, - "start_time": "2022-04-18T00:17:48.145676", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Let's now configure the setup we need, which includes the session object from the SageMaker Python SDK, and neccessary configurations for the pipelines, such as object types, input and output buckets and so on." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a8115972", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:48.209712Z", - "iopub.status.busy": "2022-04-18T00:17:48.209176Z", - "iopub.status.idle": "2022-04-18T00:17:48.361714Z", - "shell.execute_reply": "2022-04-18T00:17:48.361286Z" - }, - "papermill": { - "duration": 0.182419, - "end_time": "2022-04-18T00:17:48.361819", - "exception": false, - "start_time": "2022-04-18T00:17:48.179400", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Create the SageMaker Session\n", - "\n", - "sagemaker_session = sagemaker.Session()\n", - "sm_client = sagemaker_session.sagemaker_client\n", - "region = sagemaker_session.boto_region_name\n", - "prefix = \"lambda-step-pipeline\"\n", - "\n", - "account_id = sagemaker_session.account_id()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "0ff78481", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:48.410227Z", - "iopub.status.busy": "2022-04-18T00:17:48.409395Z", - "iopub.status.idle": "2022-04-18T00:17:49.042133Z", - "shell.execute_reply": "2022-04-18T00:17:49.041700Z" - }, - "papermill": { - "duration": 0.663019, - "end_time": "2022-04-18T00:17:49.042285", - "exception": false, - "start_time": "2022-04-18T00:17:48.379266", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Define variables and parameters needed for the Pipeline steps\n", - "\n", - "role = sagemaker.get_execution_role()\n", - "default_bucket = sagemaker_session.default_bucket()\n", - "base_job_prefix = \"lambda-step-example\"\n", - "s3_prefix = \"lambda-step-pipeline\"\n", - "\n", - "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", - "processing_instance_type = ParameterString(\n", - " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", - ")\n", - "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", - "model_approval_status = ParameterString(\n", - " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", - ")\n", - "input_data = ParameterString(\n", - " name=\"InputDataUrl\",\n", - " default_value=f\"s3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv\",\n", - ")\n", - "model_approval_status = ParameterString(\n", - " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", - ")\n", - "\n", - "# Cache Pipeline steps to reduce execution time on subsequent executions\n", - "cache_config = CacheConfig(enable_caching=True, expire_after=\"30d\")" - ] - }, - { - "cell_type": "markdown", - "id": "5f243888", - "metadata": { - "papermill": { - "duration": 0.017327, - "end_time": "2022-04-18T00:17:49.077018", - "exception": false, - "start_time": "2022-04-18T00:17:49.059691", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Data Preparation\n", - "\n", - "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets. \n", - "\n", - "The output of this step is used as the input to the TrainingStep." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0cb15d01", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.116655Z", - "iopub.status.busy": "2022-04-18T00:17:49.116031Z", - "iopub.status.idle": "2022-04-18T00:17:49.118696Z", - "shell.execute_reply": "2022-04-18T00:17:49.119062Z" - }, - "papermill": { - "duration": 0.024982, - "end_time": "2022-04-18T00:17:49.119206", - "exception": false, - "start_time": "2022-04-18T00:17:49.094224", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Writing preprocess.py\n" - ] - } - ], - "source": [ - "%%writefile preprocess.py\n", - "\n", - "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", - "import argparse\n", - "import logging\n", - "import os\n", - "import pathlib\n", - "import requests\n", - "import tempfile\n", - "\n", - "import boto3\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", - "\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.INFO)\n", - "logger.addHandler(logging.StreamHandler())\n", - "\n", - "\n", - "# Since we get a headerless CSV file we specify the column names here.\n", - "feature_columns_names = [\n", - " \"sex\",\n", - " \"length\",\n", - " \"diameter\",\n", - " \"height\",\n", - " \"whole_weight\",\n", - " \"shucked_weight\",\n", - " \"viscera_weight\",\n", - " \"shell_weight\",\n", - "]\n", - "label_column = \"rings\"\n", - "\n", - "feature_columns_dtype = {\n", - " \"sex\": str,\n", - " \"length\": np.float64,\n", - " \"diameter\": np.float64,\n", - " \"height\": np.float64,\n", - " \"whole_weight\": np.float64,\n", - " \"shucked_weight\": np.float64,\n", - " \"viscera_weight\": np.float64,\n", - " \"shell_weight\": np.float64,\n", - "}\n", - "label_column_dtype = {\"rings\": np.float64}\n", - "\n", - "\n", - "def merge_two_dicts(x, y):\n", - " \"\"\"Merges two dicts, returning a new copy.\"\"\"\n", - " z = x.copy()\n", - " z.update(y)\n", - " return z\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " logger.debug(\"Starting preprocessing.\")\n", - " parser = argparse.ArgumentParser()\n", - " parser.add_argument(\"--input-data\", type=str, required=True)\n", - " args = parser.parse_args()\n", - "\n", - " base_dir = \"/opt/ml/processing\"\n", - " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", - " input_data = args.input_data\n", - " bucket = input_data.split(\"/\")[2]\n", - " key = \"/\".join(input_data.split(\"/\")[3:])\n", - "\n", - " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", - " fn = f\"{base_dir}/data/abalone-dataset.csv\"\n", - " s3 = boto3.resource(\"s3\")\n", - " s3.Bucket(bucket).download_file(key, fn)\n", - "\n", - " logger.debug(\"Reading downloaded data.\")\n", - " df = pd.read_csv(\n", - " fn,\n", - " header=None,\n", - " names=feature_columns_names + [label_column],\n", - " dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n", - " )\n", - " os.unlink(fn)\n", - "\n", - " logger.debug(\"Defining transformers.\")\n", - " numeric_features = list(feature_columns_names)\n", - " numeric_features.remove(\"sex\")\n", - " numeric_transformer = Pipeline(\n", - " steps=[\n", - " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", - " (\"scaler\", StandardScaler()),\n", - " ]\n", - " )\n", - "\n", - " categorical_features = [\"sex\"]\n", - " categorical_transformer = Pipeline(\n", - " steps=[\n", - " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", - " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n", - " ]\n", - " )\n", - "\n", - " preprocess = ColumnTransformer(\n", - " transformers=[\n", - " (\"num\", numeric_transformer, numeric_features),\n", - " (\"cat\", categorical_transformer, categorical_features),\n", - " ]\n", - " )\n", - "\n", - " logger.info(\"Applying transforms.\")\n", - " y = df.pop(\"rings\")\n", - " X_pre = preprocess.fit_transform(df)\n", - " y_pre = y.to_numpy().reshape(len(y), 1)\n", - "\n", - " X = np.concatenate((y_pre, X_pre), axis=1)\n", - "\n", - " logger.info(\"Splitting %d rows of data into train, validation, test datasets.\", len(X))\n", - " np.random.shuffle(X)\n", - " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", - "\n", - " logger.info(\"Writing out datasets to %s.\", base_dir)\n", - " pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n", - " pd.DataFrame(validation).to_csv(\n", - " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", - " )\n", - " pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "cd0f71fb", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.163730Z", - "iopub.status.busy": "2022-04-18T00:17:49.162913Z", - "iopub.status.idle": "2022-04-18T00:17:49.179630Z", - "shell.execute_reply": "2022-04-18T00:17:49.179988Z" - }, - "papermill": { - "duration": 0.043225, - "end_time": "2022-04-18T00:17:49.180127", - "exception": false, - "start_time": "2022-04-18T00:17:49.136902", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Process the training data step using a python script.\n", - "# Split the training data set into train, test, and validation datasets\n", - "\n", - "sklearn_processor = SKLearnProcessor(\n", - " framework_version=\"1.0-1\",\n", - " instance_type=processing_instance_type,\n", - " instance_count=processing_instance_count,\n", - " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")\n", - "step_process = ProcessingStep(\n", - " name=\"PreprocessAbaloneData\",\n", - " processor=sklearn_processor,\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", - " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", - " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", - " ],\n", - " code=\"preprocess.py\",\n", - " job_arguments=[\"--input-data\", input_data],\n", - " cache_config=cache_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "01f476dc", - "metadata": { - "papermill": { - "duration": 0.017547, - "end_time": "2022-04-18T00:17:49.215444", - "exception": false, - "start_time": "2022-04-18T00:17:49.197897", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Model Training and Evaluation\n", - "\n", - "We will now train an XGBoost model using the SageMaker Python SDK and the output of the ProcessingStep." - ] - }, - { - "cell_type": "markdown", - "id": "d949b96d", - "metadata": { - "papermill": { - "duration": 0.017417, - "end_time": "2022-04-18T00:17:49.250440", - "exception": false, - "start_time": "2022-04-18T00:17:49.233023", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "#### Training the Model" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "a683373c", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.292700Z", - "iopub.status.busy": "2022-04-18T00:17:49.292202Z", - "iopub.status.idle": "2022-04-18T00:17:49.306149Z", - "shell.execute_reply": "2022-04-18T00:17:49.306566Z" - }, - "papermill": { - "duration": 0.038713, - "end_time": "2022-04-18T00:17:49.306706", - "exception": false, - "start_time": "2022-04-18T00:17:49.267993", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n", - "model_path = f\"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain\"\n", - "\n", - "image_uri = sagemaker.image_uris.retrieve(\n", - " framework=\"xgboost\",\n", - " region=region,\n", - " version=\"1.0-1\",\n", - " py_version=\"py3\",\n", - " instance_type=training_instance_type,\n", - ")\n", - "\n", - "xgb_train = Estimator(\n", - " image_uri=image_uri,\n", - " instance_type=training_instance_type,\n", - " instance_count=1,\n", - " output_path=model_path,\n", - " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")\n", - "\n", - "xgb_train.set_hyperparameters(\n", - " objective=\"reg:linear\",\n", - " num_round=50,\n", - " max_depth=5,\n", - " eta=0.2,\n", - " gamma=4,\n", - " min_child_weight=6,\n", - " subsample=0.7,\n", - " silent=0,\n", - ")\n", - "\n", - "step_train = TrainingStep(\n", - " name=\"TrainAbaloneModel\",\n", - " estimator=xgb_train,\n", - " inputs={\n", - " \"train\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " \"validation\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", - " \"validation\"\n", - " ].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " },\n", - " cache_config=cache_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "28b11fc1", - "metadata": { - "papermill": { - "duration": 0.017655, - "end_time": "2022-04-18T00:17:49.342172", - "exception": false, - "start_time": "2022-04-18T00:17:49.324517", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "#### Evaluating the model\n", - "\n", - "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model is created and a Lambda function is invoked to deploy the model to a SageMaker Endpoint. " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "e2c5f52a", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.381974Z", - "iopub.status.busy": "2022-04-18T00:17:49.381353Z", - "iopub.status.idle": "2022-04-18T00:17:49.384458Z", - "shell.execute_reply": "2022-04-18T00:17:49.384016Z" - }, - "papermill": { - "duration": 0.024593, - "end_time": "2022-04-18T00:17:49.384562", - "exception": false, - "start_time": "2022-04-18T00:17:49.359969", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Writing evaluate.py\n" - ] - } - ], - "source": [ - "%%writefile evaluate.py\n", - "\n", - "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", - "import json\n", - "import logging\n", - "import pathlib\n", - "import pickle\n", - "import tarfile\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import xgboost\n", - "\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.INFO)\n", - "logger.addHandler(logging.StreamHandler())\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " logger.debug(\"Starting evaluation.\")\n", - " model_path = \"/opt/ml/processing/model/model.tar.gz\"\n", - " with tarfile.open(model_path) as tar:\n", - " tar.extractall(path=\".\")\n", - "\n", - " logger.debug(\"Loading xgboost model.\")\n", - " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", - "\n", - " logger.debug(\"Reading test data.\")\n", - " test_path = \"/opt/ml/processing/test/test.csv\"\n", - " df = pd.read_csv(test_path, header=None)\n", - "\n", - " logger.debug(\"Reading test data.\")\n", - " y_test = df.iloc[:, 0].to_numpy()\n", - " df.drop(df.columns[0], axis=1, inplace=True)\n", - " X_test = xgboost.DMatrix(df.values)\n", - "\n", - " logger.info(\"Performing predictions against test data.\")\n", - " predictions = model.predict(X_test)\n", - "\n", - " logger.debug(\"Calculating mean squared error.\")\n", - " mse = mean_squared_error(y_test, predictions)\n", - " std = np.std(y_test - predictions)\n", - " report_dict = {\n", - " \"regression_metrics\": {\n", - " \"mse\": {\"value\": mse, \"standard_deviation\": std},\n", - " },\n", - " }\n", - "\n", - " output_dir = \"/opt/ml/processing/evaluation\"\n", - " pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n", - "\n", - " logger.info(\"Writing out evaluation report with mse: %f\", mse)\n", - " evaluation_path = f\"{output_dir}/evaluation.json\"\n", - " with open(evaluation_path, \"w\") as f:\n", - " f.write(json.dumps(report_dict))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "5112c1e7", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.428306Z", - "iopub.status.busy": "2022-04-18T00:17:49.427644Z", - "iopub.status.idle": "2022-04-18T00:17:49.429594Z", - "shell.execute_reply": "2022-04-18T00:17:49.429983Z" - }, - "papermill": { - "duration": 0.027021, - "end_time": "2022-04-18T00:17:49.430118", - "exception": false, - "start_time": "2022-04-18T00:17:49.403097", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# A ProcessingStep is used to evaluate the performance of the trained model.\n", - "# Based on the results of the evaluation, the model is created and deployed.\n", - "\n", - "script_eval = ScriptProcessor(\n", - " image_uri=image_uri,\n", - " command=[\"python3\"],\n", - " instance_type=processing_instance_type,\n", - " instance_count=1,\n", - " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")\n", - "\n", - "evaluation_report = PropertyFile(\n", - " name=\"AbaloneEvaluationReport\",\n", - " output_name=\"evaluation\",\n", - " path=\"evaluation.json\",\n", - ")\n", - "\n", - "step_eval = ProcessingStep(\n", - " name=\"EvaluateAbaloneModel\",\n", - " processor=script_eval,\n", - " inputs=[\n", - " ProcessingInput(\n", - " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " destination=\"/opt/ml/processing/model\",\n", - " ),\n", - " ProcessingInput(\n", - " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", - " destination=\"/opt/ml/processing/test\",\n", - " ),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(\n", - " output_name=\"evaluation\",\n", - " source=\"/opt/ml/processing/evaluation\",\n", - " destination=f\"s3://{default_bucket}/{s3_prefix}/evaluation_report\",\n", - " ),\n", - " ],\n", - " code=\"evaluate.py\",\n", - " property_files=[evaluation_report],\n", - " cache_config=cache_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6057e873", - "metadata": { - "papermill": { - "duration": 0.018122, - "end_time": "2022-04-18T00:17:49.466653", - "exception": false, - "start_time": "2022-04-18T00:17:49.448531", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "#### Creating the final model object\n", - "\n", - "The model is created and the name of the model is provided to the Lambda function for deployment. The `CreateModelStep` dynamically assigns a name to the model." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "f3cb5cf6", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.507558Z", - "iopub.status.busy": "2022-04-18T00:17:49.506946Z", - "iopub.status.idle": "2022-04-18T00:17:49.508786Z", - "shell.execute_reply": "2022-04-18T00:17:49.509197Z" - }, - "papermill": { - "duration": 0.024605, - "end_time": "2022-04-18T00:17:49.509328", - "exception": false, - "start_time": "2022-04-18T00:17:49.484723", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Create Model\n", - "model = Model(\n", - " image_uri=image_uri,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - " predictor_cls=XGBoostPredictor,\n", - ")\n", - "\n", - "step_create_model = CreateModelStep(\n", - " name=\"CreateModel\",\n", - " model=model,\n", - " inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "f97f9e65", - "metadata": { - "papermill": { - "duration": 0.018099, - "end_time": "2022-04-18T00:17:49.545716", - "exception": false, - "start_time": "2022-04-18T00:17:49.527617", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Setting up Lambda\n", - "\n", - "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function. \n", - "\n", - "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n", - "\n", - "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary. " - ] - }, - { - "cell_type": "markdown", - "id": "562dd374", - "metadata": { - "papermill": { - "duration": 0.018179, - "end_time": "2022-04-18T00:17:49.582321", - "exception": false, - "start_time": "2022-04-18T00:17:49.564142", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "#### Define the Lambda function\n", - "\n", - "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the Lambda step. " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "21b0ff38", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.622985Z", - "iopub.status.busy": "2022-04-18T00:17:49.622243Z", - "iopub.status.idle": "2022-04-18T00:17:49.624984Z", - "shell.execute_reply": "2022-04-18T00:17:49.625347Z" - }, - "papermill": { - "duration": 0.024985, - "end_time": "2022-04-18T00:17:49.625476", - "exception": false, - "start_time": "2022-04-18T00:17:49.600491", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Writing lambda_helper.py\n" - ] - } - ], - "source": [ - "%%writefile lambda_helper.py\n", - "\n", - "\"\"\"\n", - "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint. \n", - "The name of the model to deploy is provided via the `event` argument\n", - "\"\"\"\n", - "\n", - "import json\n", - "import boto3\n", - "\n", - "\n", - "def lambda_handler(event, context):\n", - " \"\"\" \"\"\"\n", - " sm_client = boto3.client(\"sagemaker\")\n", - "\n", - " # The name of the model created in the Pipeline CreateModelStep\n", - " model_name = event[\"model_name\"]\n", - "\n", - " endpoint_config_name = event[\"endpoint_config_name\"]\n", - " endpoint_name = event[\"endpoint_name\"]\n", - "\n", - " create_endpoint_config_response = sm_client.create_endpoint_config(\n", - " EndpointConfigName=endpoint_config_name,\n", - " ProductionVariants=[\n", - " {\n", - " \"InstanceType\": \"ml.m4.xlarge\",\n", - " \"InitialVariantWeight\": 1,\n", - " \"InitialInstanceCount\": 1,\n", - " \"ModelName\": model_name,\n", - " \"VariantName\": \"AllTraffic\",\n", - " }\n", - " ],\n", - " )\n", - "\n", - " create_endpoint_response = sm_client.create_endpoint(\n", - " EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name\n", - " )\n", - "\n", - " return {\n", - " \"statusCode\": 200,\n", - " \"body\": json.dumps(\"Created Endpoint!\"),\n", - " \"other_key\": \"example_value\",\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "37c86090", - "metadata": { - "papermill": { - "duration": 0.018627, - "end_time": "2022-04-18T00:17:49.662825", - "exception": false, - "start_time": "2022-04-18T00:17:49.644198", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "#### Setting up the custom IAM Role\n", - "\n", - "The Lambda function needs an IAM role that allows it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n", - "\n", - "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", - "\n", - "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy with least privileges as per AWS IAM best practices." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "7f302df2", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:49.703722Z", - "iopub.status.busy": "2022-04-18T00:17:49.702924Z", - "iopub.status.idle": "2022-04-18T00:17:50.206467Z", - "shell.execute_reply": "2022-04-18T00:17:50.206847Z" - }, - "papermill": { - "duration": 0.525683, - "end_time": "2022-04-18T00:17:50.206984", - "exception": false, - "start_time": "2022-04-18T00:17:49.681301", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using ARN from existing role: lambda-deployment-role\n" - ] - } - ], - "source": [ - "from iam_helper import create_lambda_role\n", - "\n", - "lambda_role = create_lambda_role(\"lambda-deployment-role\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "22bcd1f5", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:50.251245Z", - "iopub.status.busy": "2022-04-18T00:17:50.250734Z", - "iopub.status.idle": "2022-04-18T00:17:50.279900Z", - "shell.execute_reply": "2022-04-18T00:17:50.279400Z" - }, - "papermill": { - "duration": 0.054153, - "end_time": "2022-04-18T00:17:50.280008", - "exception": false, - "start_time": "2022-04-18T00:17:50.225855", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Custom Lambda Step\n", - "\n", - "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", - "model_name = \"demo-lambda-model\" + current_time\n", - "endpoint_config_name = \"demo-lambda-deploy-endpoint-config-\" + current_time\n", - "endpoint_name = \"demo-lambda-deploy-endpoint-\" + current_time\n", - "\n", - "function_name = \"sagemaker-lambda-step-endpoint-deploy-\" + current_time\n", - "\n", - "# Lambda helper class can be used to create the Lambda function\n", - "func = Lambda(\n", - " function_name=function_name,\n", - " execution_role_arn=lambda_role,\n", - " script=\"lambda_helper.py\",\n", - " handler=\"lambda_helper.lambda_handler\",\n", - ")\n", - "\n", - "output_param_1 = LambdaOutput(output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String)\n", - "output_param_2 = LambdaOutput(output_name=\"body\", output_type=LambdaOutputTypeEnum.String)\n", - "output_param_3 = LambdaOutput(output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String)\n", - "\n", - "step_deploy_lambda = LambdaStep(\n", - " name=\"LambdaStep\",\n", - " lambda_func=func,\n", - " inputs={\n", - " \"model_name\": step_create_model.properties.ModelName,\n", - " \"endpoint_config_name\": endpoint_config_name,\n", - " \"endpoint_name\": endpoint_name,\n", - " },\n", - " outputs=[output_param_1, output_param_2, output_param_3],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "1f96dce6", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:50.323116Z", - "iopub.status.busy": "2022-04-18T00:17:50.322583Z", - "iopub.status.idle": "2022-04-18T00:17:50.324359Z", - "shell.execute_reply": "2022-04-18T00:17:50.324730Z" - }, - "papermill": { - "duration": 0.025749, - "end_time": "2022-04-18T00:17:50.324861", - "exception": false, - "start_time": "2022-04-18T00:17:50.299112", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# ConditionStep for evaluating model quality and branching execution.\n", - "# The `json_path` value is based on the `report_dict` variable in `evaluate.py`\n", - "\n", - "cond_lte = ConditionLessThanOrEqualTo(\n", - " left=JsonGet(\n", - " step_name=step_eval.name,\n", - " property_file=evaluation_report,\n", - " json_path=\"regression_metrics.mse.value\",\n", - " ),\n", - " right=6.0,\n", - ")\n", - "\n", - "step_cond = ConditionStep(\n", - " name=\"CheckMSEAbaloneEvaluation\",\n", - " conditions=[cond_lte],\n", - " if_steps=[step_create_model, step_deploy_lambda],\n", - " else_steps=[],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "e59c0049", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:50.367772Z", - "iopub.status.busy": "2022-04-18T00:17:50.367212Z", - "iopub.status.idle": "2022-04-18T00:17:50.369378Z", - "shell.execute_reply": "2022-04-18T00:17:50.368975Z" - }, - "papermill": { - "duration": 0.025431, - "end_time": "2022-04-18T00:17:50.369486", - "exception": false, - "start_time": "2022-04-18T00:17:50.344055", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Use the same pipeline name across executions for cache usage.\n", - "\n", - "pipeline_name = \"lambda-step-pipeline\" + current_time\n", - "\n", - "pipeline = Pipeline(\n", - " name=pipeline_name,\n", - " parameters=[\n", - " processing_instance_type,\n", - " processing_instance_count,\n", - " training_instance_type,\n", - " input_data,\n", - " model_approval_status,\n", - " ],\n", - " steps=[step_process, step_train, step_eval, step_cond],\n", - " sagemaker_session=sagemaker_session,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b19ba0c7", - "metadata": { - "papermill": { - "duration": 0.019137, - "end_time": "2022-04-18T00:17:50.407773", - "exception": false, - "start_time": "2022-04-18T00:17:50.388636", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Execute the Pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "5ef428da", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:50.451723Z", - "iopub.status.busy": "2022-04-18T00:17:50.449084Z", - "iopub.status.idle": "2022-04-18T00:17:50.875491Z", - "shell.execute_reply": "2022-04-18T00:17:50.875857Z" - }, - "papermill": { - "duration": 0.449276, - "end_time": "2022-04-18T00:17:50.875989", - "exception": false, - "start_time": "2022-04-18T00:17:50.426713", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Version': '2020-12-01',\n", - " 'Metadata': {},\n", - " 'Parameters': [{'Name': 'ProcessingInstanceType',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'ml.m5.xlarge'},\n", - " {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},\n", - " {'Name': 'TrainingInstanceType',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'ml.m5.xlarge'},\n", - " {'Name': 'InputDataUrl',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 's3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv'},\n", - " {'Name': 'ModelApprovalStatus',\n", - " 'Type': 'String',\n", - " 'DefaultValue': 'PendingManualApproval'}],\n", - " 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},\n", - " 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},\n", - " 'Steps': [{'Name': 'PreprocessAbaloneData',\n", - " 'Type': 'Processing',\n", - " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},\n", - " 'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},\n", - " 'VolumeSizeInGB': 30}},\n", - " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3',\n", - " 'ContainerArguments': ['--input-data',\n", - " {'Get': 'Parameters.InputDataUrl'}],\n", - " 'ContainerEntrypoint': ['python3',\n", - " '/opt/ml/processing/input/code/preprocess.py']},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'ProcessingInputs': [{'InputName': 'code',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-04-18-00-17-50-447/input/code/preprocess.py',\n", - " 'LocalPath': '/opt/ml/processing/input/code',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}}],\n", - " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'train',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-04-18-00-17-50-447/output/train',\n", - " 'LocalPath': '/opt/ml/processing/train',\n", - " 'S3UploadMode': 'EndOfJob'}},\n", - " {'OutputName': 'validation',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-04-18-00-17-50-447/output/validation',\n", - " 'LocalPath': '/opt/ml/processing/validation',\n", - " 'S3UploadMode': 'EndOfJob'}},\n", - " {'OutputName': 'test',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-04-18-00-17-50-447/output/test',\n", - " 'LocalPath': '/opt/ml/processing/test',\n", - " 'S3UploadMode': 'EndOfJob'}}]}},\n", - " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'}},\n", - " {'Name': 'TrainAbaloneModel',\n", - " 'Type': 'Training',\n", - " 'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',\n", - " 'TrainingImage': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'},\n", - " 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/AbaloneTrain'},\n", - " 'StoppingCondition': {'MaxRuntimeInSeconds': 86400},\n", - " 'ResourceConfig': {'InstanceCount': 1,\n", - " 'InstanceType': {'Get': 'Parameters.TrainingInstanceType'},\n", - " 'VolumeSizeInGB': 30},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", - " 'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['train'].S3Output.S3Uri\"},\n", - " 'S3DataDistributionType': 'FullyReplicated'}},\n", - " 'ContentType': 'text/csv',\n", - " 'ChannelName': 'train'},\n", - " {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", - " 'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['validation'].S3Output.S3Uri\"},\n", - " 'S3DataDistributionType': 'FullyReplicated'}},\n", - " 'ContentType': 'text/csv',\n", - " 'ChannelName': 'validation'}],\n", - " 'HyperParameters': {'objective': 'reg:linear',\n", - " 'num_round': '50',\n", - " 'max_depth': '5',\n", - " 'eta': '0.2',\n", - " 'gamma': '4',\n", - " 'min_child_weight': '6',\n", - " 'subsample': '0.7',\n", - " 'silent': '0'},\n", - " 'ProfilerRuleConfigurations': [{'RuleConfigurationName': 'ProfilerReport-1650241070',\n", - " 'RuleEvaluatorImage': '895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest',\n", - " 'RuleParameters': {'rule_to_invoke': 'ProfilerReport'}}],\n", - " 'ProfilerConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/AbaloneTrain'}},\n", - " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'}},\n", - " {'Name': 'EvaluateAbaloneModel',\n", - " 'Type': 'Processing',\n", - " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},\n", - " 'InstanceCount': 1,\n", - " 'VolumeSizeInGB': 30}},\n", - " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", - " 'ContainerEntrypoint': ['python3',\n", - " '/opt/ml/processing/input/code/evaluate.py']},\n", - " 'RoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'ProcessingInputs': [{'InputName': 'input-1',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': {'Get': 'Steps.TrainAbaloneModel.ModelArtifacts.S3ModelArtifacts'},\n", - " 'LocalPath': '/opt/ml/processing/model',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}},\n", - " {'InputName': 'input-2',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['test'].S3Output.S3Uri\"},\n", - " 'LocalPath': '/opt/ml/processing/test',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}},\n", - " {'InputName': 'code',\n", - " 'AppManaged': False,\n", - " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/lambda-step-exampl-2022-04-18-00-17-50-536/input/code/evaluate.py',\n", - " 'LocalPath': '/opt/ml/processing/input/code',\n", - " 'S3DataType': 'S3Prefix',\n", - " 'S3InputMode': 'File',\n", - " 'S3DataDistributionType': 'FullyReplicated',\n", - " 'S3CompressionType': 'None'}}],\n", - " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'evaluation',\n", - " 'AppManaged': False,\n", - " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/evaluation_report',\n", - " 'LocalPath': '/opt/ml/processing/evaluation',\n", - " 'S3UploadMode': 'EndOfJob'}}]}},\n", - " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'},\n", - " 'PropertyFiles': [{'PropertyFileName': 'AbaloneEvaluationReport',\n", - " 'OutputName': 'evaluation',\n", - " 'FilePath': 'evaluation.json'}]},\n", - " {'Name': 'CheckMSEAbaloneEvaluation',\n", - " 'Type': 'Condition',\n", - " 'Arguments': {'Conditions': [{'Type': 'LessThanOrEqualTo',\n", - " 'LeftValue': {'Std:JsonGet': {'PropertyFile': {'Get': 'Steps.EvaluateAbaloneModel.PropertyFiles.AbaloneEvaluationReport'},\n", - " 'Path': 'regression_metrics.mse.value'}},\n", - " 'RightValue': 6.0}],\n", - " 'IfSteps': [{'Name': 'CreateModel',\n", - " 'Type': 'Model',\n", - " 'Arguments': {'ExecutionRoleArn': 'arn:aws:iam::000000000000:role/ProdBuildSystemStack-ReleaseBuildRoleFB326D49-QK8LUA2UI1IC',\n", - " 'PrimaryContainer': {'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", - " 'Environment': {},\n", - " 'ModelDataUrl': {'Get': 'Steps.TrainAbaloneModel.ModelArtifacts.S3ModelArtifacts'}}}},\n", - " {'Name': 'LambdaStep',\n", - " 'Type': 'Lambda',\n", - " 'Arguments': {'model_name': {'Get': 'Steps.CreateModel.ModelName'},\n", - " 'endpoint_config_name': 'demo-lambda-deploy-endpoint-config-04-18-00-17-50',\n", - " 'endpoint_name': 'demo-lambda-deploy-endpoint-04-18-00-17-50'},\n", - " 'FunctionArn': 'arn:aws:lambda:us-west-2:000000000000:function:sagemaker-lambda-step-endpoint-deploy-04-18-00-17-50',\n", - " 'OutputParameters': [{'OutputName': 'statusCode',\n", - " 'OutputType': 'String'},\n", - " {'OutputName': 'body', 'OutputType': 'String'},\n", - " {'OutputName': 'other_key', 'OutputType': 'String'}]}],\n", - " 'ElseSteps': []}}]}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import json\n", - "\n", - "definition = json.loads(pipeline.definition())\n", - "definition" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "fa70bdb2", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:50.919107Z", - "iopub.status.busy": "2022-04-18T00:17:50.918503Z", - "iopub.status.idle": "2022-04-18T00:17:51.361206Z", - "shell.execute_reply": "2022-04-18T00:17:51.360785Z" - }, - "papermill": { - "duration": 0.465591, - "end_time": "2022-04-18T00:17:51.361313", - "exception": false, - "start_time": "2022-04-18T00:17:50.895722", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/lambda-step-pipeline04-18-00-17-50',\n", - " 'ResponseMetadata': {'RequestId': '405aaf34-c135-4efc-a44b-7804be51583a',\n", - " 'HTTPStatusCode': 200,\n", - " 'HTTPHeaders': {'x-amzn-requestid': '405aaf34-c135-4efc-a44b-7804be51583a',\n", - " 'content-type': 'application/x-amz-json-1.1',\n", - " 'content-length': '102',\n", - " 'date': 'Mon, 18 Apr 2022 00:17:51 GMT'},\n", - " 'RetryAttempts': 0}}" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pipeline.upsert(role_arn=role)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c3ff5590", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:51.406344Z", - "iopub.status.busy": "2022-04-18T00:17:51.405776Z", - "iopub.status.idle": "2022-04-18T00:17:51.638250Z", - "shell.execute_reply": "2022-04-18T00:17:51.637817Z" - }, - "papermill": { - "duration": 0.257072, - "end_time": "2022-04-18T00:17:51.638388", - "exception": false, - "start_time": "2022-04-18T00:17:51.381316", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution = pipeline.start()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "1891de27", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:17:51.683933Z", - "iopub.status.busy": "2022-04-18T00:17:51.683371Z", - "iopub.status.idle": "2022-04-18T00:29:54.052358Z", - "shell.execute_reply": "2022-04-18T00:29:54.052761Z" - }, - "papermill": { - "duration": 722.394302, - "end_time": "2022-04-18T00:29:54.052900", - "exception": false, - "start_time": "2022-04-18T00:17:51.658598", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "execution.wait()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "3d747238", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:29:54.103947Z", - "iopub.status.busy": "2022-04-18T00:29:54.101545Z", - "iopub.status.idle": "2022-04-18T00:33:24.717122Z", - "shell.execute_reply": "2022-04-18T00:33:24.717530Z" - }, - "papermill": { - "duration": 210.644301, - "end_time": "2022-04-18T00:33:24.717669", - "exception": false, - "start_time": "2022-04-18T00:29:54.073368", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# Create a SageMaker client\n", - "sm_client = sagemaker.Session().sagemaker_client\n", - "\n", - "# Wait for the endpoint to be in service\n", - "waiter = sm_client.get_waiter(\"endpoint_in_service\")\n", - "waiter.wait(EndpointName=endpoint_name)" - ] - }, - { - "cell_type": "markdown", - "id": "9b4a173b", - "metadata": { - "papermill": { - "duration": 0.020528, - "end_time": "2022-04-18T00:33:24.758623", - "exception": false, - "start_time": "2022-04-18T00:33:24.738095", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Clean up resources\n", - "\n", - "Running the following cell will delete the following resources created in this notebook -\n", - "* SageMaker Model\n", - "* SageMaker Endpoint Configuration\n", - "* SageMaker Endpoint\n", - "* SageMaker Pipeline\n", - "* Lambda Function" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "e6305fbe", - "metadata": { - "execution": { - "iopub.execute_input": "2022-04-18T00:33:24.842207Z", - "iopub.status.busy": "2022-04-18T00:33:24.841656Z", - "iopub.status.idle": "2022-04-18T00:33:25.560327Z", - "shell.execute_reply": "2022-04-18T00:33:25.560701Z" - }, - "papermill": { - "duration": 0.782103, - "end_time": "2022-04-18T00:33:25.560838", - "exception": false, - "start_time": "2022-04-18T00:33:24.778735", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/lambda-step-pipeline04-18-00-17-50',\n", - " 'ResponseMetadata': {'RequestId': '48e9d128-0f40-4c52-92dd-93b057e06e12',\n", - " 'HTTPStatusCode': 200,\n", - " 'HTTPHeaders': {'x-amzn-requestid': '48e9d128-0f40-4c52-92dd-93b057e06e12',\n", - " 'content-type': 'application/x-amz-json-1.1',\n", - " 'content-length': '102',\n", - " 'date': 'Mon, 18 Apr 2022 00:33:24 GMT'},\n", - " 'RetryAttempts': 0}}" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available\n", - "# outside the Pipeline execution context so `step_create_model.properties.ModelName`\n", - "# cannot be used while deleting the model.\n", - "\n", - "model_name = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)[\n", - " \"ProductionVariants\"\n", - "][0][\"ModelName\"]\n", - "\n", - "# Delete the Model\n", - "sm_client.delete_model(ModelName=model_name)\n", - "\n", - "# Delete the EndpointConfig\n", - "sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)\n", - "\n", - "# Delete the Endpoint\n", - "sm_client.delete_endpoint(EndpointName=endpoint_name)\n", - "\n", - "# Delete the Lambda function\n", - "func.delete()\n", - "\n", - "# Delete the Pipeline\n", - "sm_client.delete_pipeline(PipelineName=pipeline_name)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.9" - }, - "metadata": { - "interpreter": { - "hash": "ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" - } - }, - "papermill": { - "default_parameters": {}, - "duration": 941.276516, - "end_time": "2022-04-18T00:33:26.098307", - "environment_variables": {}, - "exception": null, - "input_path": "sagemaker-pipelines-lambda-step.ipynb", - "output_path": "/opt/ml/processing/output/sagemaker-pipelines-lambda-step-2022-04-18-00-13-01.ipynb", - "parameters": { - "kms_key": "arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab" - }, - "start_time": "2022-04-18T00:17:44.821791", - "version": "2.3.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/sagemaker-pipelines/tabular/model-monitor-clarify-pipelines/sagemaker-pipeline-model-monitor-clarify-steps.ipynb b/sagemaker-pipelines/tabular/model-monitor-clarify-pipelines/sagemaker-pipeline-model-monitor-clarify-steps.ipynb index 73bbebf015..53f2958a51 100644 --- a/sagemaker-pipelines/tabular/model-monitor-clarify-pipelines/sagemaker-pipeline-model-monitor-clarify-steps.ipynb +++ b/sagemaker-pipelines/tabular/model-monitor-clarify-pipelines/sagemaker-pipeline-model-monitor-clarify-steps.ipynb @@ -7,7 +7,7 @@ "## SageMaker Pipelines integration with Model Monitor and Clarify\n", "\n", "This notebook showcases how Model Monitor and Clarify steps can be integrated with SageMaker Pipelines. This allows users to calculate\n", - "baselines for data quality and model quality checks by running the underlying Model Monitor and Clarify containers. " + "baselines for data quality and model quality checks by running the underlying Model Monitor and Clarify containers." ] }, { @@ -16,7 +16,7 @@ "source": [ "## Data/Model Quality, Bias, and Model Explainability Checks in SageMaker Pipelines\n", "\n", - "This notebook introduces two new step types in SageMaker Pipelines - \n", + "This notebook introduces two new step types in SageMaker Pipelines -\n", "* `QualityCheckStep`\n", "* `ClarifyCheckStep`\n", "\n", @@ -29,7 +29,7 @@ "\n", "The training dataset that you used to train the model is usually a good baseline dataset. The training dataset data schema and the inference dataset schema should exactly match (the number and order of the features). Note that the prediction/output columns are assumed to be the first columns in the training dataset. From the training dataset, you can ask SageMaker to suggest a set of baseline constraints and generate descriptive statistics to explore the data.\n", "\n", - "These two new steps will always calculate new baselines using the dataset provided. " + "These two new steps will always calculate new baselines using the dataset provided." ] }, { @@ -38,20 +38,20 @@ "source": [ "### Drift Check Baselines in the Model Registry\n", "\n", - "The `RegisterStep` has a new parameter called `drift_check_baselines`. This refers to the baseline files associated with the model. When deployed, these baseline files are used by Model Monitor for Model Quality/Data Quality checks. In addition, these baselines can be used in `QualityCheckStep` and `ClarifyCheckStep` to compare newly trained models against models that have already been registered in the Model Registry. \n", + "The `RegisterStep` has a new parameter called `drift_check_baselines`. This refers to the baseline files associated with the model. When deployed, these baseline files are used by Model Monitor for Model Quality/Data Quality checks. In addition, these baselines can be used in `QualityCheckStep` and `ClarifyCheckStep` to compare newly trained models against models that have already been registered in the Model Registry.\n", "\n", "### Step Properties\n", "\n", "The `QualityCheckStep` has the following properties -\n", "\n", "* `CalculatedBaselineStatistics` : The baseline statistics file calculated by the underlying Model Monitor container.\n", - "* `CalculatedBaselineConstraints` : The baseline constraints file calculated by the underlying Model Monitor container. \n", - "* `BaselineUsedForDriftCheckStatistics` and `BaselineUsedForDriftCheckConstraints` : These are the two properties used to set `drift_check_baseline` in the Model Registry. The values set in these properties vary depending on the parameters passed to the step. The different behaviors are described in the table below. \n", - " \n", - "The `ClarifyCheckStep` has the following properties - \n", + "* `CalculatedBaselineConstraints` : The baseline constraints file calculated by the underlying Model Monitor container.\n", + "* `BaselineUsedForDriftCheckStatistics` and `BaselineUsedForDriftCheckConstraints` : These are the two properties used to set `drift_check_baseline` in the Model Registry. The values set in these properties vary depending on the parameters passed to the step. The different behaviors are described in the table below.\n", "\n", - "* `CalculatedBaselineConstraints` : The baseline constraints file calculated by the underlying Clarify container. \n", - "* `BaselineUsedForDriftCheckConstraints` : This property is used to set `drift_check_baseline` in the Model Registry. The values set in this property will vary depending on the parameters passed to the step. The different behaviors are described in the table below. " + "The `ClarifyCheckStep` has the following properties -\n", + "\n", + "* `CalculatedBaselineConstraints` : The baseline constraints file calculated by the underlying Clarify container.\n", + "* `BaselineUsedForDriftCheckConstraints` : This property is used to set `drift_check_baseline` in the Model Registry. The values set in this property will vary depending on the parameters passed to the step. The different behaviors are described in the table below." ] }, { @@ -60,7 +60,7 @@ "source": [ "### Notebook Overview\n", "\n", - "This notebook should be run with `Python 3.9` using the SageMaker Studio `Python3 (Data Science)` kernel. The `sagemaker` sdk version required for this notebook is `>2.70.0`." + "This notebook should be run with `Python 3.9` using the SageMaker Studio `Python3 (Data Science)` kernel." ] }, { @@ -73,17 +73,25 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "! pip install botocore boto3 awscli --upgrade\n", - "! pip install \"sagemaker==2.91.1\"" + "! pip install \"sagemaker>=2.99.0\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -123,7 +131,8 @@ " CreateModelStep,\n", " TransformStep,\n", ")\n", - "from sagemaker.workflow.step_collections import RegisterModel\n", + "from sagemaker.workflow.model_step import ModelStep\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", "\n", "# Importing new steps and helper functions\n", "\n", @@ -157,13 +166,18 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "region = sagemaker.Session().boto_region_name\n", "sm_client = boto3.client(\"sagemaker\")\n", "boto_session = boto3.Session(region_name=region)\n", "sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)\n", + "pipeline_session = PipelineSession()\n", "prefix = \"model-monitor-clarify-step-pipeline\"" ] }, @@ -177,7 +191,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", @@ -193,11 +211,11 @@ "source": [ "### Define pipeline parameters\n", "\n", - "Both `QualityCheckStep` and `ClarifyCheckStep` use two boolean flags `skip_check` and `register_new_baseline` to control their behavior. \n", + "Both `QualityCheckStep` and `ClarifyCheckStep` use two boolean flags `skip_check` and `register_new_baseline` to control their behavior.\n", "\n", "* `skip_check` : This determines if a drift check is executed or not.\n", "* `register_new_baseline` : This determines if the newly calculated baselines (in the step property `CalculatedBaselines`) should be set in the step property `BaselineUsedForDriftCheck`.\n", - "* `supplied_baseline_statistics` and `supplied_baseline_constraints` : If `skip_check` is set to False, baselines can be provided to this step through this parameter. If provided, the step will compare the newly calculated baselines (`CalculatedBaselines`) against those provided here instead of finding the latest baselines from the Model Registry. In the case of `ClarifyCheckStep`, only `supplied_baseline_constraints` is a valid parameter, for `QualityCheckStep`, both parameters are used. \n", + "* `supplied_baseline_statistics` and `supplied_baseline_constraints` : If `skip_check` is set to False, baselines can be provided to this step through this parameter. If provided, the step will compare the newly calculated baselines (`CalculatedBaselines`) against those provided here instead of finding the latest baselines from the Model Registry. In the case of `ClarifyCheckStep`, only `supplied_baseline_constraints` is a valid parameter, for `QualityCheckStep`, both parameters are used.\n", "* `model_package_group_name` : The step will use the `drift_check_baselines` from the latest approved model in the model package group for the drift check. If `supplied_baseline_*` is provided, this field will be ignored.\n", "\n", "The first time the pipeline is run, the `skip_check` value should be set to True using the pipeline execution parameters so that new baselines are registered and no drift check is executed." @@ -209,13 +227,13 @@ "source": [ "### Combining Pipeline parameters\n", "\n", - "This table summarizes how the pipeline parameters work when combined. \n", + "This table summarizes how the pipeline parameters work when combined.\n", "\n", "The parameter `drift_check_baselines` is used to supply baselines to the `RegisterStep` that will be used for all drift checks involving the model.\n", "\n", - "Newly calculated baselines can be reference by the properties `CalculatedBaselineStatistics` and `CalculatedBaselineConstraints` on the `QualityCheckStep` and `CalculatedBaselineConstraints` on the `ClarifyCheckStep`. \n", + "Newly calculated baselines can be reference by the properties `CalculatedBaselineStatistics` and `CalculatedBaselineConstraints` on the `QualityCheckStep` and `CalculatedBaselineConstraints` on the `ClarifyCheckStep`.\n", "\n", - "For example, `data_quality_check_step.properties.CalculatedBaselineStatistics` and `data_quality_check_step.properties.CalculatedBaselineConstraints`. This property refers to the baseline that is calculated when the data quality check step is executed. " + "For example, `data_quality_check_step.properties.CalculatedBaselineStatistics` and `data_quality_check_step.properties.CalculatedBaselineConstraints`. This property refers to the baseline that is calculated when the data quality check step is executed." ] }, { @@ -224,7 +242,7 @@ "source": [ "\n", "| `skip_check` / `register_new_baseline` | Does step do a drift check? | Value of step property `CalculatedBaseline` | Value of step property `BaselineUsedForDriftCheck` | Possible Circumstances for this parameter combination|\n", - "| -------------------------------------- | ---------------------------------------------------------|------------------------------------------------------------ |------------------------------------------------- | -----------------------------------------------------| \n", + "| -------------------------------------- | ---------------------------------------------------------|------------------------------------------------------------ |------------------------------------------------- | -----------------------------------------------------|\n", "| F / F | Drift Check executed against existing baselines. | New baselines calculated by step execution | Baseline from latest approved model in Model Registry or baseline supplied as step parameter | Regular re-training with checks enabled to get a new model version, but carry over previous baselines as DriftCheckBaselines in Registry for new model version. |\n", "| F / T | Drift Check executed against existing baselines. | New baselines calculated by step execution | Newly calculated baseline by step execution (value of property `CalculatedBaseline`) | Regular re-training with checks enabled to get a new model version, but refresh DriftCheckBaselines in Registry with newly calculated baselines for the new model version. |\n", "| T / F | No Drift Check. | New baselines calculated by step execution | Baseline from latest approved model in Model Registry or baseline supplied as step parameter | Violation detected by the model monitor on endpoint for a particular type of check and the pipeline is triggered for retraining a new model. Skip the check against previous baselines, but carry over previous baselines as DriftCheckBaselines in Registry for new model version. |\n", @@ -234,7 +252,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", @@ -313,10 +335,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile preprocess.py\n", + "!mkdir -p code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%writefile code/preprocess.py\n", "\n", "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", "import argparse\n", @@ -446,7 +485,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "sklearn_processor = SKLearnProcessor(\n", @@ -454,21 +497,19 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=processing_instance_count,\n", " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", - "\n", - "step_process = ProcessingStep(\n", - " name=\"PreprocessAbaloneData\",\n", - " processor=sklearn_processor,\n", + "processor_args = sklearn_processor.run(\n", " outputs=[\n", " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", " ],\n", - " code=\"preprocess.py\",\n", - " job_arguments=[\"--input-data\", input_data],\n", - ")" + " code=\"code/preprocess.py\",\n", + " arguments=[\"--input-data\", input_data],\n", + ")\n", + "step_process = ProcessingStep(name=\"PreprocessAbaloneData\", step_args=processor_args)" ] }, { @@ -477,15 +518,19 @@ "source": [ "### Calculating the Data Quality\n", "\n", - "`CheckJobConfig` is a helper function that's used to define the job configurations used by the `QualityCheckStep`. By separating the job configuration from the step parameters, the same `CheckJobConfig` can be used across multiple steps for quality checks. \n", + "`CheckJobConfig` is a helper function that's used to define the job configurations used by the `QualityCheckStep`. By separating the job configuration from the step parameters, the same `CheckJobConfig` can be used across multiple steps for quality checks.\n", "\n", - "The `DataQualityCheckConfig` is used to define the Quality Check job by specifying the dataset used to calculate the baseline, in this case, the training dataset from the data processing step, the dataset format, in this case, a csv file with no headers, and the output path for the results of the data quality check. " + "The `DataQualityCheckConfig` is used to define the Quality Check job by specifying the dataset used to calculate the baseline, in this case, the training dataset from the data processing step, the dataset format, in this case, a csv file with no headers, and the output path for the results of the data quality check." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "check_job_config = CheckJobConfig(\n", @@ -529,9 +574,9 @@ "source": [ "### Calculating the Data Bias\n", "\n", - "The job configuration from the previous step is used here and the `DataConfig` class is used to define how the `ClarifyCheckStep` should compute the data bias. The training dataset is used again for the bias evaluation, the column representing the label is specified through the `label` parameter, and a `BiasConfig` is provided. \n", + "The job configuration from the previous step is used here and the `DataConfig` class is used to define how the `ClarifyCheckStep` should compute the data bias. The training dataset is used again for the bias evaluation, the column representing the label is specified through the `label` parameter, and a `BiasConfig` is provided.\n", "\n", - "In the `BiasConfig`, we specify a facet name (the column that is the focal point of the bias calculation), the value of the facet that determines the range of values it can hold, and the threshold value for the label. \n", + "In the `BiasConfig`, we specify a facet name (the column that is the focal point of the bias calculation), the value of the facet that determines the range of values it can hold, and the threshold value for the label.\n", "\n", "More details on `BiasConfig` can be found [here](https://sagemaker.readthedocs.io/en/stable/api/training/processing.html#sagemaker.clarify.BiasConfig)." ] @@ -539,7 +584,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "data_bias_analysis_cfg_output_path = (\n", @@ -596,7 +645,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model_path = f\"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/AbaloneTrain\"\n", @@ -614,7 +667,7 @@ " instance_count=1,\n", " output_path=model_path,\n", " base_job_name=f\"{base_job_prefix}/abalone-train\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", @@ -629,9 +682,7 @@ " silent=0,\n", ")\n", "\n", - "step_train = TrainingStep(\n", - " name=\"TrainAbaloneModel\",\n", - " estimator=xgb_train,\n", + "train_args = xgb_train.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", @@ -644,6 +695,10 @@ " content_type=\"text/csv\",\n", " ),\n", " },\n", + ")\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " step_args=train_args,\n", " depends_on=[data_bias_check_step.name, data_quality_check_step.name],\n", ")" ] @@ -660,25 +715,23 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model = Model(\n", " image_uri=image_uri,\n", " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", - "inputs = CreateModelInput(\n", - " instance_type=\"ml.m5.large\",\n", - " accelerator_type=\"ml.eia1.medium\",\n", - ")\n", - "\n", - "step_create_model = CreateModelStep(\n", + "step_create_model = ModelStep(\n", " name=\"AbaloneCreateModel\",\n", - " model=model,\n", - " inputs=inputs,\n", + " step_args=model.create(instance_type=\"ml.m5.large\", accelerator_type=\"ml.eia1.medium\"),\n", ")" ] }, @@ -695,7 +748,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "transformer = Transformer(\n", @@ -727,13 +784,17 @@ "source": [ "### Check the Model Quality\n", "\n", - "In this `QualityCheckStep` we calculate the baselines for statistics and constraints using the predictions that the model generates from the test dataset (output from the TransformStep). We define the problem type as 'Regression' in the `ModelQualityCheckConfig` along with specifying the columns which represent the input and output. Since the dataset has no headers, `_c0`, `_c1` are auto-generated header names that should be used in the `ModelQualityCheckConfig`. " + "In this `QualityCheckStep` we calculate the baselines for statistics and constraints using the predictions that the model generates from the test dataset (output from the TransformStep). We define the problem type as 'Regression' in the `ModelQualityCheckConfig` along with specifying the columns which represent the input and output. Since the dataset has no headers, `_c0`, `_c1` are auto-generated header names that should be used in the `ModelQualityCheckConfig`." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model_quality_check_config = ModelQualityCheckConfig(\n", @@ -772,13 +833,17 @@ "source": [ "### Check for Model Bias\n", "\n", - "Similar to the Data Bias check step, a `BiasConfig` is defined and Clarify is used to calculate the model bias using the training dataset and the model. " + "Similar to the Data Bias check step, a `BiasConfig` is defined and Clarify is used to calculate the model bias using the training dataset and the model." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model_bias_analysis_cfg_output_path = (\n", @@ -839,15 +904,19 @@ "source": [ "### Check Model Explainability\n", "\n", - "SageMaker Clarify uses a model-agnostic feature attribution approach, which you can use to understand why a model made a prediction after training and to provide per-instance explanation during inference. The implementation includes a scalable and efficient implementation of SHAP, based on the concept of a Shapley value from the field of cooperative game theory that assigns each feature an importance value for a particular prediction. \n", + "SageMaker Clarify uses a model-agnostic feature attribution approach, which you can use to understand why a model made a prediction after training and to provide per-instance explanation during inference. The implementation includes a scalable and efficient implementation of SHAP, based on the concept of a Shapley value from the field of cooperative game theory that assigns each feature an importance value for a particular prediction.\n", "\n", - "For Model Explainability, Clarify requires an explainability configuration to be provided. In this example, we use `SHAPConfig`. For more information of `explainability_config`, visit the [Clarify documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-model-explainability.html). " + "For Model Explainability, Clarify requires an explainability configuration to be provided. In this example, we use `SHAPConfig`. For more information of `explainability_config`, visit the [Clarify documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-model-explainability.html)." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model_explainability_analysis_cfg_output_path = \"s3://{}/{}/{}/{}\".format(\n", @@ -895,16 +964,20 @@ "source": [ "### Evaluate the performance of the model\n", "\n", - "Using a processing job, evaluate the performance of the model. The performance is used in the Condition Step to determine if the model should be registered or not. " + "Using a processing job, evaluate the performance of the model. The performance is used in the Condition Step to determine if the model should be registered or not." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile evaluate.py\n", + "%%writefile code/evaluate.py\n", "\n", "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", "import json\n", @@ -966,7 +1039,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "script_eval = ScriptProcessor(\n", @@ -975,7 +1052,7 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=1,\n", " base_job_name=f\"{base_job_prefix}/script-abalone-eval\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "evaluation_report = PropertyFile(\n", @@ -983,9 +1060,8 @@ " output_name=\"evaluation\",\n", " path=\"evaluation.json\",\n", ")\n", - "step_eval = ProcessingStep(\n", - " name=\"EvaluateAbaloneModel\",\n", - " processor=script_eval,\n", + "\n", + "eval_args = script_eval.run(\n", " inputs=[\n", " ProcessingInput(\n", " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", @@ -999,7 +1075,11 @@ " outputs=[\n", " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", " ],\n", - " code=\"evaluate.py\",\n", + " code=\"code/evaluate.py\",\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " step_args=eval_args,\n", " property_files=[evaluation_report],\n", ")" ] @@ -1014,7 +1094,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "model_metrics = ModelMetrics(\n", @@ -1094,7 +1178,7 @@ "source": [ "### Register the model\n", "\n", - "The two parameters in `RegisterModel` that hold the metrics calculated by the `ClarifyCheckStep` and `QualityCheckStep` are `model_metrics` and `drift_check_baselines`. \n", + "The two parameters in `RegisterModel` that hold the metrics calculated by the `ClarifyCheckStep` and `QualityCheckStep` are `model_metrics` and `drift_check_baselines`.\n", "\n", "`drift_check_baselines` - these are the baseline files that will be used for drift checks in `QualityCheckStep` or `ClarifyCheckStep` and model monitoring jobs that are set up on endpoints hosting this model.\n", "\n", @@ -1102,19 +1186,20 @@ "\n", "The intention behind these parameters is to give users a way to configure the baselines associated with a model so they can be used in drift checks or model monitoring jobs. Each time a pipeline is executed, users can choose to update the `drift_check_baselines` with newly calculated baselines. The `model_metrics` can be used to register the newly calculated baselines or any other metrics associated with the model.\n", "\n", - "Every time a baseline is calculated, it is not necessary that the baselines used for drift checks are updated to the newly calculated baselines. In some cases, users may retain an older version of the baseline file to be used for drift checks and not register new baselines that are calculated in the Pipeline run. " + "Every time a baseline is calculated, it is not necessary that the baselines used for drift checks are updated to the newly calculated baselines. In some cases, users may retain an older version of the baseline file to be used for drift checks and not register new baselines that are calculated in the Pipeline run." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "step_register = RegisterModel(\n", - " name=\"RegisterAbaloneModel\",\n", - " estimator=xgb_train,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + "register_args = model.register(\n", " content_types=[\"text/csv\"],\n", " response_types=[\"text/csv\"],\n", " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", @@ -1123,13 +1208,19 @@ " approval_status=model_approval_status,\n", " model_metrics=model_metrics,\n", " drift_check_baselines=drift_check_baselines,\n", - ")" + ")\n", + "\n", + "step_register = ModelStep(name=\"RegisterAbaloneModel\", step_args=register_args)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# condition step for evaluating model quality and branching execution\n", @@ -1159,7 +1250,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# pipeline instance\n", @@ -1201,7 +1296,7 @@ " step_eval,\n", " step_cond,\n", " ],\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", ")" ] }, @@ -1215,7 +1310,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import json\n", @@ -1227,7 +1326,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.upsert(role_arn=role)" @@ -1245,7 +1348,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution = pipeline.start(\n", @@ -1274,7 +1381,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.wait()" @@ -1292,7 +1403,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create a SageMaker client\n", @@ -1315,21 +1430,21 @@ "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "conda_amazonei_mxnet_p27", + "display_name": "Python 3", "language": "python", - "name": "conda_amazonei_mxnet_p27" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" + "pygments_lexer": "ipython3", + "version": "3.6.14" } }, "nbformat": 4, diff --git a/sagemaker-pipelines/tabular/train-register-deploy-pipeline-model/train register and deploy a pipeline model.ipynb b/sagemaker-pipelines/tabular/train-register-deploy-pipeline-model/train register and deploy a pipeline model.ipynb index 7b6a7f83fc..2fbf96183d 100644 --- a/sagemaker-pipelines/tabular/train-register-deploy-pipeline-model/train register and deploy a pipeline model.ipynb +++ b/sagemaker-pipelines/tabular/train-register-deploy-pipeline-model/train register and deploy a pipeline model.ipynb @@ -6,13 +6,32 @@ "source": [ "# SageMaker Pipelines\n", "\n", - "The following notebook shows how to create an Amazon SageMaker Pipeline that builds and trains a **PipelineModel** consisting of a preprocessing SKLearn script followed by a TensorFlow model. The pipeline model is then registered to the Model Registry and deployed from there into a real-time endpoint. " + "The following notebook shows how to create an Amazon SageMaker Pipeline that builds and trains a **PipelineModel** consisting of a preprocessing SKLearn script followed by a TensorFlow model. The pipeline model is then registered to the Model Registry and deployed from there into a real-time endpoint." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -21,13 +40,18 @@ "import numpy as np\n", "import pandas as pd\n", "import sagemaker\n", - "from sagemaker import get_execution_role" + "from sagemaker import get_execution_role\n", + "from sagemaker.workflow.pipeline_context import PipelineSession" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "sess = boto3.Session()\n", @@ -37,9 +61,11 @@ "bucket = sagemaker_session.default_bucket()\n", "region = boto3.Session().region_name\n", "\n", + "pipeline_session = PipelineSession()\n", + "\n", "model_package_group_name = \"PipelineModelPackageGroup\"\n", "prefix = \"pipeline-model-example\"\n", - "pipeline_name = \"TrainingPipelineForModel\" # SageMaker Pipeline name" + "pipeline_name = \"serial-inference-pipeline\" # SageMaker Pipeline name" ] }, { @@ -62,7 +88,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "data_dir = os.path.join(os.getcwd(), \"data\")\n", @@ -75,7 +105,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "!aws s3 cp s3://sagemaker-sample-files/datasets/tabular/california_housing/cal_housing.tgz ." @@ -84,7 +118,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "!tar -zxf cal_housing.tgz" @@ -93,7 +131,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "columns = [\n", @@ -135,7 +177,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.parameters import ParameterInteger, ParameterString, ParameterFloat\n", @@ -144,14 +190,16 @@ "input_data = ParameterString(name=\"InputData\", default_value=raw_s3)\n", "\n", "# status of newly trained model in registry\n", - "model_approval_status = ParameterString(\n", - " name=\"ModelApprovalStatus\", default_value=\"Approved\"\n", - ") # PendingManualApproval | Rejected\n", + "model_approval_status = ParameterString(name=\"ModelApprovalStatus\", default_value=\"Approved\")\n", "\n", "# processing step parameters\n", + "processing_instance_type = ParameterString(\n", + " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", + ")\n", "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", "\n", "# training step parameters\n", + "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", "training_epochs = ParameterString(name=\"TrainingEpochs\", default_value=\"100\")\n", "\n", "# model performance step parameters\n", @@ -169,13 +217,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The below preprocessing script, in addition to creating a scaler, contains the necessary functions for it to be deployed as part of a pipeline model. " + "The below preprocessing script, in addition to creating a scaler, contains the necessary functions for it to be deployed as part of a pipeline model." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "!mkdir -p code" @@ -184,7 +236,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "%%writefile code/preprocess.py\n", @@ -325,11 +381,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.sklearn.processing import SKLearnProcessor\n", - "\n", + "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", "\n", "sklearn_framework_version = \"0.23-1\"\n", "\n", @@ -339,13 +399,30 @@ " instance_count=processing_instance_count,\n", " base_job_name=\"sklearn-housing-data-process\",\n", " role=role,\n", + " sagemaker_session=pipeline_session,\n", + ")\n", + "\n", + "processor_args = sklearn_processor.run(\n", + " inputs=[\n", + " ProcessingInput(source=input_data, destination=\"/opt/ml/processing/input\"),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"scaler_model\", source=\"/opt/ml/processing/scaler_model\"),\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"code/preprocess.py\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", @@ -354,16 +431,7 @@ "\n", "step_process = ProcessingStep(\n", " name=\"PreprocessData\",\n", - " processor=sklearn_processor,\n", - " inputs=[\n", - " ProcessingInput(source=input_data, destination=\"/opt/ml/processing/input\"),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"scaler_model\", source=\"/opt/ml/processing/scaler_model\"),\n", - " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", - " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", - " ],\n", - " code=\"code/preprocess.py\",\n", + " step_args=processor_args,\n", ")" ] }, @@ -377,7 +445,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "%%writefile code/train.py\n", @@ -482,13 +554,17 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.tensorflow import TensorFlow\n", "from sagemaker.inputs import TrainingInput\n", "from sagemaker.workflow.steps import TrainingStep\n", - "from sagemaker.workflow.step_collections import RegisterModel\n", + "from sagemaker.workflow.model_step import ModelStep\n", "import time\n", "\n", "# Where to store the trained model\n", @@ -501,7 +577,7 @@ "tf2_estimator = TensorFlow(\n", " source_dir=\"code\",\n", " entry_point=\"train.py\",\n", - " instance_type=\"ml.m5.xlarge\",\n", + " instance_type=training_instance_type,\n", " instance_count=1,\n", " framework_version=tensorflow_version,\n", " role=role,\n", @@ -509,13 +585,11 @@ " output_path=model_path,\n", " hyperparameters=hyperparameters,\n", " py_version=python_version,\n", + " sagemaker_session=pipeline_session,\n", ")\n", "\n", - "# Use the tf2_estimator in a Sagemaker pipelines ProcessingStep.\n", "# NOTE how the input to the training job directly references the output of the previous step.\n", - "step_train_model = TrainingStep(\n", - " name=\"TrainTensorflowModel\",\n", - " estimator=tf2_estimator,\n", + "train_args = tf2_estimator.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", @@ -525,17 +599,12 @@ " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", " content_type=\"text/csv\",\n", " ),\n", - " },\n", - ")" + " }\n", + ")\n", + "\n", + "step_train_model = TrainingStep(name=\"TrainTensorflowModel\", step_args=train_args)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -546,7 +615,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "%%writefile code/evaluate.py\n", @@ -605,14 +678,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.properties import PropertyFile\n", @@ -632,7 +702,8 @@ " image_uri=tf_eval_image_uri,\n", " command=[\"python3\"],\n", " instance_count=1,\n", - " instance_type=\"ml.m5.xlarge\",\n", + " instance_type=processing_instance_type,\n", + " sagemaker_session=pipeline_session,\n", ")\n", "\n", "# Create a PropertyFile\n", @@ -642,10 +713,7 @@ " name=\"EvaluationReport\", output_name=\"evaluation\", path=\"evaluation.json\"\n", ")\n", "\n", - "# Use the evaluate_model_processor in a Sagemaker pipelines ProcessingStep.\n", - "step_evaluate_model = ProcessingStep(\n", - " name=\"EvaluateModelPerformance\",\n", - " processor=evaluate_model_processor,\n", + "eval_args = evaluate_model_processor.run(\n", " inputs=[\n", " ProcessingInput(\n", " source=step_train_model.properties.ModelArtifacts.S3ModelArtifacts,\n", @@ -660,17 +728,15 @@ " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", " ],\n", " code=\"code/evaluate.py\",\n", + ")\n", + "\n", + "step_evaluate_model = ProcessingStep(\n", + " name=\"EvaluateModelPerformance\",\n", + " step_args=eval_args,\n", " property_files=[evaluation_report],\n", ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -681,7 +747,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.model import Model\n", @@ -696,12 +766,11 @@ "scaler_model = SKLearnModel(\n", " model_data=scaler_model_s3,\n", " role=role,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " entry_point=\"code/preprocess.py\",\n", " framework_version=sklearn_framework_version,\n", ")\n", "\n", - "\n", "tf_model_image_uri = sagemaker.image_uris.retrieve(\n", " framework=\"tensorflow\",\n", " region=region,\n", @@ -714,19 +783,23 @@ "tf_model = Model(\n", " image_uri=tf_model_image_uri,\n", " model_data=step_train_model.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", "pipeline_model = PipelineModel(\n", - " models=[scaler_model, tf_model], role=role, sagemaker_session=sagemaker_session\n", + " models=[scaler_model, tf_model], role=role, sagemaker_session=pipeline_session\n", ")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.model_metrics import MetricsSource, ModelMetrics\n", @@ -744,9 +817,7 @@ " )\n", ")\n", "\n", - "step_register_pipeline_model = RegisterModel(\n", - " name=\"PipelineModel\",\n", - " model=pipeline_model,\n", + "register_args = pipeline_model.register(\n", " content_types=[\"text/csv\"],\n", " response_types=[\"text/csv\"],\n", " inference_instances=[\"ml.m5.large\", \"ml.m5.xlarge\"],\n", @@ -754,6 +825,11 @@ " model_package_group_name=model_package_group_name,\n", " model_metrics=model_metrics,\n", " approval_status=model_approval_status,\n", + ")\n", + "\n", + "step_register_pipeline_model = ModelStep(\n", + " name=\"PipelineModel\",\n", + " step_args=register_args,\n", ")" ] }, @@ -767,7 +843,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", @@ -805,7 +885,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.workflow.pipeline import Pipeline\n", @@ -818,6 +902,8 @@ "pipeline = Pipeline(\n", " name=pipeline_name,\n", " parameters=[\n", + " training_instance_type,\n", + " processing_instance_type,\n", " processing_instance_count,\n", " input_data,\n", " model_approval_status,\n", @@ -831,7 +917,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import json\n", @@ -850,7 +940,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.upsert(role_arn=role)" @@ -859,7 +953,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution = pipeline.start()" @@ -868,26 +966,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "execution.wait()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -898,7 +986,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "%%writefile utils.py\n", @@ -967,7 +1059,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from utils import get_approved_package\n", @@ -985,7 +1081,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker import ModelPackage\n", @@ -1003,7 +1103,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", @@ -1014,7 +1118,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "data = pd.read_csv(\"data/raw/raw_data_all.csv\")\n", @@ -1030,7 +1138,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "blue, stop = \"\\033[94m\", \"\\033[0m\"\n", @@ -1052,7 +1164,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "sm_client = boto3.client(\"sagemaker\")\n", @@ -1069,7 +1185,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "predictor.delete_endpoint()" @@ -1078,26 +1198,23 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.delete()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-1:470317259841:image/datascience-1.0" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1109,9 +1226,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.6.14" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/tuning-step/sagemaker-pipelines-tuning-step.ipynb b/sagemaker-pipelines/tabular/tuning-step/sagemaker-pipelines-tuning-step.ipynb index c279514393..4ea0d6e112 100644 --- a/sagemaker-pipelines/tabular/tuning-step/sagemaker-pipelines-tuning-step.ipynb +++ b/sagemaker-pipelines/tabular/tuning-step/sagemaker-pipelines-tuning-step.ipynb @@ -6,7 +6,7 @@ "source": [ "#### SageMaker Pipelines Tuning Step\n", "\n", - "This notebook illustrates how a Hyperparameter Tuning Job can be run as a step in a SageMaker Pipeline. \n", + "This notebook illustrates how a Hyperparameter Tuning Job can be run as a step in a SageMaker Pipeline.\n", "\n", "The steps in this pipeline include -\n", "* Preprocessing the abalone dataset\n", @@ -19,18 +19,26 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import sys\n", "\n", - "!{sys.executable} -m pip install \"sagemaker==2.91.1\"" + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import os\n", @@ -61,18 +69,14 @@ ")\n", "from sagemaker.workflow.pipeline import Pipeline\n", "from sagemaker.workflow.properties import PropertyFile\n", - "from sagemaker.workflow.steps import (\n", - " ProcessingStep,\n", - " CacheConfig,\n", - " TuningStep,\n", - ")\n", - "from sagemaker.workflow.step_collections import RegisterModel, CreateModelStep\n", + "from sagemaker.workflow.steps import ProcessingStep, CacheConfig, TuningStep\n", + "from sagemaker.workflow.model_step import ModelStep\n", "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", "from sagemaker.workflow.condition_step import ConditionStep\n", "\n", "from sagemaker.workflow.functions import Join, JsonGet\n", "from sagemaker.workflow.execution_variables import ExecutionVariables\n", - "\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", "from sagemaker.tuner import (\n", " ContinuousParameter,\n", " HyperparameterTuner,\n", @@ -84,7 +88,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Create the SageMaker Session\n", @@ -92,13 +100,20 @@ "region = sagemaker.Session().boto_region_name\n", "sm_client = boto3.client(\"sagemaker\")\n", "boto_session = boto3.Session(region_name=region)\n", - "sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)" + "sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)\n", + "\n", + "# Create a Pipeline Session\n", + "pipeline_session = PipelineSession()" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Define variables and parameters needed for the Pipeline steps\n", @@ -131,7 +146,7 @@ "source": [ "#### Data Preparation\n", "\n", - "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets. \n", + "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets.\n", "\n", "The output of this step is used as the input to the TuningStep" ] @@ -139,10 +154,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile preprocess.py\n", + "!mkdir -p code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%writefile code/preprocess.py\n", "\n", "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", "import argparse\n", @@ -272,7 +304,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Process the training data step using a python script.\n", @@ -287,12 +323,11 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=processing_instance_count,\n", " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", - "step_process = ProcessingStep(\n", - " name=\"PreprocessAbaloneDataForHPO\",\n", - " processor=sklearn_processor,\n", + "\n", + "processor_run_args = sklearn_processor.run(\n", " outputs=[\n", " ProcessingOutput(\n", " output_name=\"train\",\n", @@ -337,8 +372,13 @@ " ),\n", " ),\n", " ],\n", - " code=\"preprocess.py\",\n", - " job_arguments=[\"--input-data\", input_data],\n", + " code=\"code/preprocess.py\",\n", + " arguments=[\"--input-data\", input_data],\n", + ")\n", + "\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneDataForHPO\",\n", + " step_args=processor_run_args,\n", ")" ] }, @@ -348,7 +388,7 @@ "source": [ "#### Hyperparameter Tuning\n", "\n", - "Amazon SageMaker automatic model tuning, also known as hyperparameter tuning, finds the best version of a model by running many training jobs on your dataset using the algorithm and ranges of hyperparameters that you specify. It then chooses the hyperparameter values that result in a model that performs the best, as measured by a metric that you choose. \n", + "Amazon SageMaker automatic model tuning, also known as hyperparameter tuning, finds the best version of a model by running many training jobs on your dataset using the algorithm and ranges of hyperparameters that you specify. It then chooses the hyperparameter values that result in a model that performs the best, as measured by a metric that you choose.\n", "\n", "[Valid metrics](https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst#learning-task-parameters) for XGBoost Tuning Job\n", "\n", @@ -358,7 +398,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n", @@ -378,7 +422,7 @@ " instance_count=1,\n", " output_path=model_path,\n", " base_job_name=f\"{base_job_prefix}/abalone-train\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", @@ -411,9 +455,7 @@ " objective_type=\"Minimize\",\n", ")\n", "\n", - "step_tuning = TuningStep(\n", - " name=\"HPTuning\",\n", - " tuner=tuner_log,\n", + "hpo_args = tuner_log.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", @@ -425,7 +467,12 @@ " ].S3Output.S3Uri,\n", " content_type=\"text/csv\",\n", " ),\n", - " },\n", + " }\n", + ")\n", + "\n", + "step_tuning = TuningStep(\n", + " name=\"HPTuning\",\n", + " step_args=hpo_args,\n", " cache_config=cache_config,\n", ")" ] @@ -440,15 +487,19 @@ "\n", "Find more information on [Warm Starts](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-warm-start.html) in the SageMaker docs.\n", "\n", - "In a training pipeline, the parent tuning job name can be provided as a pipeline parameter if there is an already complete Hyperparameter tuning job that should be used as the basis for the warm start. \n", + "In a training pipeline, the parent tuning job name can be provided as a pipeline parameter if there is an already complete Hyperparameter tuning job that should be used as the basis for the warm start.\n", "\n", - "This step is left out of the pipeline steps in this notebook. It can be added into the steps while defining the pipeline and the appropriate parent tuning job should be specified. " + "This step is left out of the pipeline steps in this notebook. It can be added into the steps while defining the pipeline and the appropriate parent tuning job should be specified." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# This is an example to illustrate how a the name of the tuning job from the previous step can be used as the parent tuning job, in practice,\n", @@ -474,9 +525,7 @@ " warm_start_config=warm_start_config,\n", ")\n", "\n", - "step_tuning_warm_start = TuningStep(\n", - " name=\"HPTuningWarmStart\",\n", - " tuner=tuner_log_warm_start,\n", + "tuner_run_args = tuner_log_warm_start.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", @@ -488,7 +537,12 @@ " ].S3Output.S3Uri,\n", " content_type=\"text/csv\",\n", " ),\n", - " },\n", + " }\n", + ")\n", + "\n", + "step_tuning_warm_start = TuningStep(\n", + " name=\"HPTuningWarmStart\",\n", + " step_args=tuner_run_args,\n", " cache_config=cache_config,\n", ")" ] @@ -499,9 +553,9 @@ "source": [ "#### Creating and Registering the best models\n", "\n", - "After successfully completing the Hyperparameter Tuning job. You can either create SageMaker models from the model artifacts created by the training jobs from the TuningStep or register the models into the Model Registry. \n", + "After successfully completing the Hyperparameter Tuning job. You can either create SageMaker models from the model artifacts created by the training jobs from the TuningStep or register the models into the Model Registry.\n", "\n", - "When using the model Registry, if you register multiple models from the TuningStep, they will be registered as versions within the same model package group unless unique model package groups are specified for each RegisterModelStep that is part of the pipeline. \n", + "When using the Model Registry, if you register multiple models from the TuningStep, they will be registered as versions within the same model package group unless unique model package groups are specified for each `ModelStep` that is part of the pipeline.\n", "\n", "In this example, the two best models from the TuningStep are added to the same model package group in the Model Registry as v0 and v1.\n", "\n", @@ -511,38 +565,40 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Creating 2 SageMaker Models\n", - "\n", "model_bucket_key = f\"{default_bucket}/{base_job_prefix}/AbaloneTrain\"\n", + "\n", "best_model = Model(\n", " image_uri=image_uri,\n", " model_data=step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", " predictor_cls=XGBoostPredictor,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", ")\n", "\n", - "step_create_first = CreateModelStep(\n", - " name=\"CreateTopModel\",\n", - " model=best_model,\n", - " inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n", + "step_create_first = ModelStep(\n", + " name=\"CreateBestModel\",\n", + " step_args=best_model.create(instance_type=\"ml.m5.xlarge\"),\n", ")\n", "\n", "second_best_model = Model(\n", " image_uri=image_uri,\n", " model_data=step_tuning.get_top_model_s3_uri(top_k=1, s3_bucket=model_bucket_key),\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", " predictor_cls=XGBoostPredictor,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", ")\n", "\n", - "step_create_second = CreateModelStep(\n", + "step_create_second = ModelStep(\n", " name=\"CreateSecondBestModel\",\n", - " model=second_best_model,\n", - " inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n", + " step_args=second_best_model.create(instance_type=\"ml.m5.xlarge\"),\n", ")" ] }, @@ -558,10 +614,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "%%writefile evaluate.py\n", + "%%writefile code/evaluate.py\n", "\n", "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", "import json\n", @@ -623,7 +683,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# A ProcessingStep is used to evaluate the performance of a selected model from the HPO step. In this case, the top performing model\n", @@ -635,7 +699,7 @@ " instance_type=\"ml.m5.xlarge\",\n", " instance_count=1,\n", " base_job_name=f\"{base_job_prefix}/script-tuning-step-eval\",\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")\n", "\n", @@ -645,10 +709,7 @@ " path=\"evaluation.json\",\n", ")\n", "\n", - "# This can be extended to evaluate multiple models from the HPO step\n", - "step_eval = ProcessingStep(\n", - " name=\"EvaluateTopModel\",\n", - " processor=script_eval,\n", + "processor_args = script_eval.run(\n", " inputs=[\n", " ProcessingInput(\n", " source=step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),\n", @@ -662,7 +723,13 @@ " outputs=[\n", " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", " ],\n", - " code=\"evaluate.py\",\n", + " code=\"code/evaluate.py\",\n", + ")\n", + "\n", + "# This can be extended to evaluate multiple models from the HPO step\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateTopModel\",\n", + " step_args=processor_args,\n", " property_files=[evaluation_report],\n", " cache_config=cache_config,\n", ")\n", @@ -680,30 +747,38 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Register the model in the Model Registry\n", - "# Multiple models can be registered into the Model Registry using multiple RegisterModel steps. These models can either be added to the\n", + "# Multiple models can be registered into the Model Registry using multiple ModelSteps. These models can either be added to the\n", "# same model package group as different versions within the group or the models can be added to different model package groups.\n", "\n", - "step_register_best = RegisterModel(\n", - " name=\"RegisterBestAbaloneModel\",\n", - " estimator=xgb_train,\n", - " model_data=step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),\n", + "register_args = best_model.register(\n", " content_types=[\"text/csv\"],\n", " response_types=[\"text/csv\"],\n", " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", " transform_instances=[\"ml.m5.large\"],\n", " model_package_group_name=model_package_group_name,\n", " approval_status=model_approval_status,\n", - ")" + ")\n", + "\n", + "\n", + "step_register_best = ModelStep(name=\"RegisterBestAbaloneModel\", step_args=register_args)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# condition step for evaluating model quality and branching execution\n", @@ -727,7 +802,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline = Pipeline(\n", @@ -746,7 +825,7 @@ " step_eval,\n", " step_cond,\n", " ],\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", ")" ] }, @@ -760,7 +839,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import json\n", @@ -772,7 +855,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.upsert(role_arn=role)" @@ -781,7 +868,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "pipeline.start()" @@ -799,7 +890,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# # Create a SageMaker client\n", @@ -822,12 +917,21 @@ "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3.9.4 64-bit ('python@3.9')", - "name": "python394jvsc74a57bd0ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" + "display_name": "Python 3", + "language": "python", + "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.14" }, "metadata": { "interpreter": { @@ -837,4 +941,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file From 8e7deee6c2dcf139e77a9303d809cde78e7429f6 Mon Sep 17 00:00:00 2001 From: Dewen Qi Date: Wed, 13 Jul 2022 14:32:32 -0700 Subject: [PATCH 2/2] Add updated output notebooks --- ...ain-evaluate-batch-transform_outputs.ipynb | 3444 +++++++++++++++++ ...emaker-pipelines-lambda-step_outputs.ipynb | 1920 +++++++++ 2 files changed, 5364 insertions(+) create mode 100644 sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb create mode 100644 sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb diff --git a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb new file mode 100644 index 0000000000..d20c0f6d83 --- /dev/null +++ b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb @@ -0,0 +1,3444 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3905c65f", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:43.979421Z", + "iopub.status.busy": "2022-07-13T16:07:43.978612Z", + "iopub.status.idle": "2022-07-13T16:07:43.980773Z", + "shell.execute_reply": "2022-07-13T16:07:43.980271Z" + }, + "papermill": { + "duration": 0.036422, + "end_time": "2022-07-13T16:07:43.980902", + "exception": false, + "start_time": "2022-07-13T16:07:43.944480", + "status": "completed" + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "kms_key = \"arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab\"" + ] + }, + { + "cell_type": "markdown", + "id": "f207c2b8", + "metadata": { + "papermill": { + "duration": 0.029403, + "end_time": "2022-07-13T16:07:44.040408", + "exception": false, + "start_time": "2022-07-13T16:07:44.011005", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Orchestrate Jobs to Train and Evaluate Models with Amazon SageMaker Pipelines\n", + "\n", + "Amazon SageMaker Pipelines offers machine learning (ML) application developers and operations engineers the ability to orchestrate SageMaker jobs and author reproducible ML pipelines. It also enables them to deploy custom-built models for inference in real-time with low latency, run offline inferences with Batch Transform, and track lineage of artifacts. They can institute sound operational practices in deploying and monitoring production workflows, deploying model artifacts, and tracking artifact lineage through a simple interface, adhering to safety and best practice paradigms for ML application development.\n", + "\n", + "The SageMaker Pipelines service supports a SageMaker Pipeline domain specific language (DSL), which is a declarative JSON specification. This DSL defines a directed acyclic graph (DAG) of pipeline parameters and SageMaker job steps. The SageMaker Python Software Developer Kit (SDK) streamlines the generation of the pipeline DSL using constructs that engineers and scientists are already familiar with.\n", + "\n", + "## Runtime\n", + "\n", + "This notebook takes approximately an hour to run.\n", + "\n", + "## Contents\n", + "\n", + "1. [SageMaker Pipelines](#SageMaker-Pipelines)\n", + "1. [Notebook Overview](#Notebook-Overview)\n", + "1. [A SageMaker Pipeline](#A-SageMaker-Pipeline)\n", + "1. [Dataset](#Dataset)\n", + "1. [Define Parameters to Parametrize Pipeline Execution](#Define-Parameters-to-Parametrize-Pipeline-Execution)\n", + "1. [Define a Processing Step for Feature Engineering](#Define-a-Processing-Step-for-Feature-Engineering)\n", + "1. [Define a Training Step to Train a Model](#Define-a-Training-Step-to-Train-a-Model)\n", + "1. [Define a Model Evaluation Step to Evaluate the Trained Model](#Define-a-Model-Evaluation-Step-to-Evaluate-the-Trained-Model)\n", + "1. [Define a Create Model Step to Create a Model](#Define-a-Create-Model-Step-to-Create-a-Model)\n", + "1. [Define a Transform Step to Perform Batch Transformation](#Define-a-Transform-Step-to-Perform-Batch-Transformation)\n", + "1. [Define a Register Model Step to Create a Model Package](#Define-a-Register-Model-Step-to-Create-a-Model-Package)\n", + "1. [Define a Fail Step to Terminate the Pipeline Execution and Mark it as Failed](#Define-a-Fail-Step-to-Terminate-the-Pipeline-Execution-and-Mark-it-as-Failed)\n", + "1. [Define a Condition Step to Check Accuracy and Conditionally Create a Model and Run a Batch Transformation and Register a Model in the Model Registry, Or Terminate the Execution in Failed State](#Define-a-Condition-Step-to-Check-Accuracy-and-Conditionally-Create-a-Model-and-Run-a-Batch-Transformation-and-Register-a-Model-in-the-Model-Registry,-Or-Terminate-the-Execution-in-Failed-State)\n", + "1. [Define a Pipeline of Parameters, Steps, and Conditions](#Define-a-Pipeline-of-Parameters,-Steps,-and-Conditions)\n", + "1. [Submit the pipeline to SageMaker and start execution](#Submit-the-pipeline-to-SageMaker-and-start-execution)\n", + "1. [Pipeline Operations: Examining and Waiting for Pipeline Execution](#Pipeline-Operations:-Examining-and-Waiting-for-Pipeline-Execution)\n", + " 1. [Examining the Evaluation](#Examining-the-Evaluation)\n", + " 1. [Lineage](#Lineage)\n", + " 1. [Parametrized Executions](#Parametrized-Executions)" + ] + }, + { + "cell_type": "markdown", + "id": "57abefb0", + "metadata": { + "papermill": { + "duration": 0.029368, + "end_time": "2022-07-13T16:07:44.099261", + "exception": false, + "start_time": "2022-07-13T16:07:44.069893", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## SageMaker Pipelines\n", + "\n", + "SageMaker Pipelines supports the following activities, which are demonstrated in this notebook:\n", + "\n", + "* Pipelines - A DAG of steps and conditions to orchestrate SageMaker jobs and resource creation.\n", + "* Processing job steps - A simplified, managed experience on SageMaker to run data processing workloads, such as feature engineering, data validation, model evaluation, and model interpretation.\n", + "* Training job steps - An iterative process that teaches a model to make predictions by presenting examples from a training dataset.\n", + "* Conditional execution steps - A step that provides conditional execution of branches in a pipeline.\n", + "* Register model steps - A step that creates a model package resource in the Model Registry that can be used to create deployable models in Amazon SageMaker.\n", + "* Create model steps - A step that creates a model for use in transform steps or later publication as an endpoint.\n", + "* Transform job steps - A batch transform to preprocess datasets to remove noise or bias that interferes with training or inference from a dataset, get inferences from large datasets, and run inference when a persistent endpoint is not needed.\n", + "* Fail steps - A step that stops a pipeline execution and marks the pipeline execution as failed.\n", + "* Parametrized Pipeline executions - Enables variation in pipeline executions according to specified parameters." + ] + }, + { + "cell_type": "markdown", + "id": "f8fd6f33", + "metadata": { + "papermill": { + "duration": 0.029618, + "end_time": "2022-07-13T16:07:44.158330", + "exception": false, + "start_time": "2022-07-13T16:07:44.128712", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Notebook Overview\n", + "\n", + "This notebook shows how to:\n", + "\n", + "* Define a set of Pipeline parameters that can be used to parametrize a SageMaker Pipeline.\n", + "* Define a Processing step that performs cleaning, feature engineering, and splitting the input data into train and test data sets.\n", + "* Define a Training step that trains a model on the preprocessed train data set.\n", + "* Define a Processing step that evaluates the trained model's performance on the test dataset.\n", + "* Define a Create Model step that creates a model from the model artifacts used in training.\n", + "* Define a Transform step that performs batch transformation based on the model that was created.\n", + "* Define a Register Model step that creates a model package from the estimator and model artifacts used to train the model.\n", + "* Define a Conditional step that measures a condition based on output from prior steps and conditionally executes other steps.\n", + "* Define a Fail step with a customized error message indicating the cause of the execution failure.\n", + "* Define and create a Pipeline definition in a DAG, with the defined parameters and steps.\n", + "* Start a Pipeline execution and wait for execution to complete.\n", + "* Download the model evaluation report from the S3 bucket for examination.\n", + "* Start a second Pipeline execution." + ] + }, + { + "cell_type": "markdown", + "id": "38772d5a", + "metadata": { + "papermill": { + "duration": 0.029536, + "end_time": "2022-07-13T16:07:44.217483", + "exception": false, + "start_time": "2022-07-13T16:07:44.187947", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## A SageMaker Pipeline\n", + "\n", + "The pipeline that you create follows a typical machine learning (ML) application pattern of preprocessing, training, evaluation, model creation, batch transformation, and model registration:\n", + "\n", + "![A typical ML Application pipeline](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-full.png)" + ] + }, + { + "cell_type": "markdown", + "id": "9607841c", + "metadata": { + "papermill": { + "duration": 0.029684, + "end_time": "2022-07-13T16:07:44.276589", + "exception": false, + "start_time": "2022-07-13T16:07:44.246905", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Dataset\n", + "\n", + "The dataset you use is the [UCI Machine Learning Abalone Dataset](https://archive.ics.uci.edu/ml/datasets/abalone) [1]. The aim for this task is to determine the age of an abalone snail from its physical measurements. At the core, this is a regression problem.\n", + "\n", + "The dataset contains several features: length (the longest shell measurement), diameter (the diameter perpendicular to length), height (the height with meat in the shell), whole_weight (the weight of whole abalone), shucked_weight (the weight of meat), viscera_weight (the gut weight after bleeding), shell_weight (the weight after being dried), sex ('M', 'F', 'I' where 'I' is Infant), and rings (integer).\n", + "\n", + "The number of rings turns out to be a good approximation for age (age is rings + 1.5). However, to obtain this number requires cutting the shell through the cone, staining the section, and counting the number of rings through a microscope, which is a time-consuming task. However, the other physical measurements are easier to determine. You use the dataset to build a predictive model of the variable rings through these other physical measurements.\n", + "\n", + "Before you upload the data to an S3 bucket, install the SageMaker Python SDK and gather some constants you can use later in this notebook.\n", + "\n", + "> [1] Dua, D. and Graff, C. (2019). [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml). Irvine, CA: University of California, School of Information and Computer Science." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ef441354", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:44.382810Z", + "iopub.status.busy": "2022-07-13T16:07:44.382069Z", + "iopub.status.idle": "2022-07-13T16:07:48.212159Z", + "shell.execute_reply": "2022-07-13T16:07:48.211713Z" + }, + "papermill": { + "duration": 3.905415, + "end_time": "2022-07-13T16:07:48.212278", + "exception": false, + "start_time": "2022-07-13T16:07:44.306863", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", + " from cryptography.utils import int_from_bytes\r\n", + "/opt/conda/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", + " from cryptography.utils import int_from_bytes\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sagemaker>=2.99.0 in /opt/conda/lib/python3.7/site-packages (2.99.0)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: protobuf3-to-dict<1.0,>=0.1.5 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.1.5)\r\n", + "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.21.1)\r\n", + "Requirement already satisfied: attrs<22,>=20.3.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (21.4.0)\r\n", + "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.2.0)\r\n", + "Requirement already satisfied: boto3<2.0,>=1.20.21 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.20.47)\r\n", + "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.2.8)\r\n", + "Requirement already satisfied: protobuf<4.0,>=3.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (3.17.3)\r\n", + "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (20.1)\r\n", + "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.0.1)\r\n", + "Requirement already satisfied: importlib-metadata<5.0,>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.5.0)\r\n", + "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.0.1)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (0.10.0)\r\n", + "Requirement already satisfied: botocore<1.24.0,>=1.23.47 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (1.23.47)\r\n", + "Requirement already satisfied: s3transfer<0.6.0,>=0.5.0 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (0.5.0)\r\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.47->boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (2.8.1)\r\n", + "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.47->boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (1.26.6)\r\n", + "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker>=2.99.0) (2.2.0)\r\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.99.0) (2.4.6)\r\n", + "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.99.0) (1.14.0)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker>=2.99.0) (2019.3)\r\n", + "Requirement already satisfied: ppft>=1.6.6.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (1.6.6.4)\r\n", + "Requirement already satisfied: pox>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.3.0)\r\n", + "Requirement already satisfied: dill>=0.3.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.3.4)\r\n", + "Requirement already satisfied: multiprocess>=0.70.12 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.70.12.2)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n", + "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n" + ] + } + ], + "source": [ + "import sys\n", + "\n", + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"\n", + "\n", + "import boto3\n", + "import sagemaker\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", + "\n", + "sagemaker_session = sagemaker.session.Session()\n", + "region = sagemaker_session.boto_region_name\n", + "role = sagemaker.get_execution_role()\n", + "pipeline_session = PipelineSession()\n", + "default_bucket = sagemaker_session.default_bucket()\n", + "model_package_group_name = f\"AbaloneModelPackageGroupName\"" + ] + }, + { + "cell_type": "markdown", + "id": "c03f70f4", + "metadata": { + "papermill": { + "duration": 0.031185, + "end_time": "2022-07-13T16:07:48.274459", + "exception": false, + "start_time": "2022-07-13T16:07:48.243274", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Now, upload the data into the default bucket. You can select our own data set for the `input_data_uri` as is appropriate." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f15c8059", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:48.346589Z", + "iopub.status.busy": "2022-07-13T16:07:48.341539Z", + "iopub.status.idle": "2022-07-13T16:07:48.497993Z", + "shell.execute_reply": "2022-07-13T16:07:48.497482Z" + }, + "papermill": { + "duration": 0.19199, + "end_time": "2022-07-13T16:07:48.498111", + "exception": false, + "start_time": "2022-07-13T16:07:48.306121", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!mkdir -p data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8ff00b12", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:48.575181Z", + "iopub.status.busy": "2022-07-13T16:07:48.574347Z", + "iopub.status.idle": "2022-07-13T16:07:49.570833Z", + "shell.execute_reply": "2022-07-13T16:07:49.571230Z" + }, + "papermill": { + "duration": 1.041888, + "end_time": "2022-07-13T16:07:49.571370", + "exception": false, + "start_time": "2022-07-13T16:07:48.529482", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "s3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset.csv\n" + ] + } + ], + "source": [ + "local_path = \"data/abalone-dataset.csv\"\n", + "\n", + "s3 = boto3.resource(\"s3\")\n", + "s3.Bucket(f\"sagemaker-sample-files\").download_file(\n", + " \"datasets/tabular/uci_abalone/abalone.csv\", local_path\n", + ")\n", + "\n", + "base_uri = f\"s3://{default_bucket}/abalone\"\n", + "input_data_uri = sagemaker.s3.S3Uploader.upload(\n", + " local_path=local_path,\n", + " desired_s3_uri=base_uri,\n", + ")\n", + "print(input_data_uri)" + ] + }, + { + "cell_type": "markdown", + "id": "68c6ec51", + "metadata": { + "papermill": { + "duration": 0.033529, + "end_time": "2022-07-13T16:07:49.637761", + "exception": false, + "start_time": "2022-07-13T16:07:49.604232", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Download a second dataset for batch transformation after model creation. You can select our own dataset for the `batch_data_uri` as is appropriate." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f671e801", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:49.722364Z", + "iopub.status.busy": "2022-07-13T16:07:49.719824Z", + "iopub.status.idle": "2022-07-13T16:07:50.203802Z", + "shell.execute_reply": "2022-07-13T16:07:50.208095Z" + }, + "papermill": { + "duration": 0.530874, + "end_time": "2022-07-13T16:07:50.208271", + "exception": false, + "start_time": "2022-07-13T16:07:49.677397", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "s3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset-batch\n" + ] + } + ], + "source": [ + "local_path = \"data/abalone-dataset-batch\"\n", + "\n", + "s3 = boto3.resource(\"s3\")\n", + "s3.Bucket(f\"sagemaker-servicecatalog-seedcode-{region}\").download_file(\n", + " \"dataset/abalone-dataset-batch\", local_path\n", + ")\n", + "\n", + "base_uri = f\"s3://{default_bucket}/abalone\"\n", + "batch_data_uri = sagemaker.s3.S3Uploader.upload(\n", + " local_path=local_path,\n", + " desired_s3_uri=base_uri,\n", + ")\n", + "print(batch_data_uri)" + ] + }, + { + "cell_type": "markdown", + "id": "345b427e", + "metadata": { + "papermill": { + "duration": 0.054012, + "end_time": "2022-07-13T16:07:50.311578", + "exception": false, + "start_time": "2022-07-13T16:07:50.257566", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define Parameters to Parametrize Pipeline Execution\n", + "\n", + "Define Pipeline parameters that you can use to parametrize the pipeline. Parameters enable custom pipeline executions and schedules without having to modify the Pipeline definition.\n", + "\n", + "The supported parameter types include:\n", + "\n", + "* `ParameterString` - represents a `str` Python type\n", + "* `ParameterInteger` - represents an `int` Python type\n", + "* `ParameterFloat` - represents a `float` Python type\n", + "\n", + "These parameters support providing a default value, which can be overridden on pipeline execution. The default value specified should be an instance of the type of the parameter.\n", + "\n", + "The parameters defined in this workflow include:\n", + "\n", + "* `processing_instance_count` - The instance count of the processing job.\n", + "* `instance_type` - The `ml.*` instance type of the training job.\n", + "* `model_approval_status` - The approval status to register with the trained model for CI/CD purposes (\"PendingManualApproval\" is the default).\n", + "* `input_data` - The S3 bucket URI location of the input data.\n", + "* `batch_data` - The S3 bucket URI location of the batch data.\n", + "* `mse_threshold` - The Mean Squared Error (MSE) threshold used to verify the accuracy of a model." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b730dc01", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:50.422392Z", + "iopub.status.busy": "2022-07-13T16:07:50.421597Z", + "iopub.status.idle": "2022-07-13T16:07:50.423907Z", + "shell.execute_reply": "2022-07-13T16:07:50.423467Z" + }, + "papermill": { + "duration": 0.058807, + "end_time": "2022-07-13T16:07:50.424032", + "exception": false, + "start_time": "2022-07-13T16:07:50.365225", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.workflow.parameters import (\n", + " ParameterInteger,\n", + " ParameterString,\n", + " ParameterFloat,\n", + ")\n", + "\n", + "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", + "instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "model_approval_status = ParameterString(\n", + " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", + ")\n", + "input_data = ParameterString(\n", + " name=\"InputData\",\n", + " default_value=input_data_uri,\n", + ")\n", + "batch_data = ParameterString(\n", + " name=\"BatchData\",\n", + " default_value=batch_data_uri,\n", + ")\n", + "mse_threshold = ParameterFloat(name=\"MseThreshold\", default_value=6.0)" + ] + }, + { + "cell_type": "markdown", + "id": "2113cab9", + "metadata": { + "papermill": { + "duration": 0.061565, + "end_time": "2022-07-13T16:07:50.532413", + "exception": false, + "start_time": "2022-07-13T16:07:50.470848", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define Parameters](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-1.png)" + ] + }, + { + "cell_type": "markdown", + "id": "0279c19d", + "metadata": { + "papermill": { + "duration": 0.034918, + "end_time": "2022-07-13T16:07:50.601674", + "exception": false, + "start_time": "2022-07-13T16:07:50.566756", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Processing Step for Feature Engineering\n", + "\n", + "First, develop a preprocessing script that is specified in the Processing step.\n", + "\n", + "This notebook cell writes a file `preprocessing_abalone.py`, which contains the preprocessing script. You can update the script, and rerun this cell to overwrite. The preprocessing script uses `scikit-learn` to do the following:\n", + "\n", + "* Fill in missing sex category data and encode it so that it is suitable for training.\n", + "* Scale and normalize all numerical fields, aside from sex and rings numerical data.\n", + "* Split the data into training, validation, and test datasets.\n", + "\n", + "The Processing step executes the script on the input data. The Training step uses the preprocessed training features and labels to train a model. The Evaluation step uses the trained model and preprocessed test features and labels to evaluate the model." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8b07a322", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:50.679648Z", + "iopub.status.busy": "2022-07-13T16:07:50.678977Z", + "iopub.status.idle": "2022-07-13T16:07:50.827765Z", + "shell.execute_reply": "2022-07-13T16:07:50.827361Z" + }, + "papermill": { + "duration": 0.19357, + "end_time": "2022-07-13T16:07:50.827884", + "exception": false, + "start_time": "2022-07-13T16:07:50.634314", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!mkdir -p code" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3901891e", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:50.900839Z", + "iopub.status.busy": "2022-07-13T16:07:50.899665Z", + "iopub.status.idle": "2022-07-13T16:07:50.902508Z", + "shell.execute_reply": "2022-07-13T16:07:50.902925Z" + }, + "papermill": { + "duration": 0.043198, + "end_time": "2022-07-13T16:07:50.903065", + "exception": false, + "start_time": "2022-07-13T16:07:50.859867", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing code/preprocessing.py\n" + ] + } + ], + "source": [ + "%%writefile code/preprocessing.py\n", + "import argparse\n", + "import os\n", + "import requests\n", + "import tempfile\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "\n", + "\n", + "# Since we get a headerless CSV file, we specify the column names here.\n", + "feature_columns_names = [\n", + " \"sex\",\n", + " \"length\",\n", + " \"diameter\",\n", + " \"height\",\n", + " \"whole_weight\",\n", + " \"shucked_weight\",\n", + " \"viscera_weight\",\n", + " \"shell_weight\",\n", + "]\n", + "label_column = \"rings\"\n", + "\n", + "feature_columns_dtype = {\n", + " \"sex\": str,\n", + " \"length\": np.float64,\n", + " \"diameter\": np.float64,\n", + " \"height\": np.float64,\n", + " \"whole_weight\": np.float64,\n", + " \"shucked_weight\": np.float64,\n", + " \"viscera_weight\": np.float64,\n", + " \"shell_weight\": np.float64,\n", + "}\n", + "label_column_dtype = {\"rings\": np.float64}\n", + "\n", + "\n", + "def merge_two_dicts(x, y):\n", + " z = x.copy()\n", + " z.update(y)\n", + " return z\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " base_dir = \"/opt/ml/processing\"\n", + "\n", + " df = pd.read_csv(\n", + " f\"{base_dir}/input/abalone-dataset.csv\",\n", + " header=None,\n", + " names=feature_columns_names + [label_column],\n", + " dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n", + " )\n", + " numeric_features = list(feature_columns_names)\n", + " numeric_features.remove(\"sex\")\n", + " numeric_transformer = Pipeline(\n", + " steps=[(\"imputer\", SimpleImputer(strategy=\"median\")), (\"scaler\", StandardScaler())]\n", + " )\n", + "\n", + " categorical_features = [\"sex\"]\n", + " categorical_transformer = Pipeline(\n", + " steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", + " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n", + " ]\n", + " )\n", + "\n", + " preprocess = ColumnTransformer(\n", + " transformers=[\n", + " (\"num\", numeric_transformer, numeric_features),\n", + " (\"cat\", categorical_transformer, categorical_features),\n", + " ]\n", + " )\n", + "\n", + " y = df.pop(\"rings\")\n", + " X_pre = preprocess.fit_transform(df)\n", + " y_pre = y.to_numpy().reshape(len(y), 1)\n", + "\n", + " X = np.concatenate((y_pre, X_pre), axis=1)\n", + "\n", + " np.random.shuffle(X)\n", + " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", + "\n", + " pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n", + " pd.DataFrame(validation).to_csv(\n", + " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", + " )\n", + " pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "753400dc", + "metadata": { + "papermill": { + "duration": 0.03212, + "end_time": "2022-07-13T16:07:50.967490", + "exception": false, + "start_time": "2022-07-13T16:07:50.935370", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Next, create an instance of a `SKLearnProcessor` processor and use that in our `ProcessingStep`.\n", + "\n", + "You also specify the `framework_version` to use throughout this notebook.\n", + "\n", + "Note the `processing_instance_count` parameter used by the processor instance." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c3563172", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:51.037823Z", + "iopub.status.busy": "2022-07-13T16:07:51.037000Z", + "iopub.status.idle": "2022-07-13T16:07:51.055494Z", + "shell.execute_reply": "2022-07-13T16:07:51.055848Z" + }, + "papermill": { + "duration": 0.056125, + "end_time": "2022-07-13T16:07:51.055993", + "exception": false, + "start_time": "2022-07-13T16:07:50.999868", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.sklearn.processing import SKLearnProcessor\n", + "\n", + "\n", + "framework_version = \"0.23-1\"\n", + "\n", + "sklearn_processor = SKLearnProcessor(\n", + " framework_version=framework_version,\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=processing_instance_count,\n", + " base_job_name=\"sklearn-abalone-process\",\n", + " role=role,\n", + " sagemaker_session=pipeline_session,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "93a1d463", + "metadata": { + "papermill": { + "duration": 0.032682, + "end_time": "2022-07-13T16:07:51.121085", + "exception": false, + "start_time": "2022-07-13T16:07:51.088403", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Finally, we take the output of the processor's `run` method and pass that as arguments to the `ProcessingStep`. By passing the `pipeline_session` to the `sagemaker_session`, calling `.run()` does not launch the processing job, it returns the arguments needed to run the job as a step in the pipeline.\n", + "\n", + "Note the `\"train_data\"` and `\"test_data\"` named channels specified in the output configuration for the processing job. Step `Properties` can be used in subsequent steps and resolve to their runtime values at execution. Specifically, this usage is called out when you define the training step." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "240281be", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:51.192383Z", + "iopub.status.busy": "2022-07-13T16:07:51.191558Z", + "iopub.status.idle": "2022-07-13T16:07:51.548107Z", + "shell.execute_reply": "2022-07-13T16:07:51.548453Z" + }, + "papermill": { + "duration": 0.39512, + "end_time": "2022-07-13T16:07:51.548594", + "exception": false, + "start_time": "2022-07-13T16:07:51.153474", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline_context.py:197: UserWarning: Running within a PipelineSession, there will be No Wait, No Logs, and No Job being started.\n", + " UserWarning,\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: sklearn-abalone-process-2022-07-13-16-07-51-228\n", + "Inputs: [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': ParameterString(name='InputData', parameter_type=, default_value='s3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset.csv'), 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/input/code/preprocessing.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'train', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/train', 'LocalPath': '/opt/ml/processing/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'validation', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/validation', 'LocalPath': '/opt/ml/processing/validation', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'test', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/test', 'LocalPath': '/opt/ml/processing/test', 'S3UploadMode': 'EndOfJob'}}]\n" + ] + } + ], + "source": [ + "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", + "from sagemaker.workflow.steps import ProcessingStep\n", + "\n", + "processor_args = sklearn_processor.run(\n", + " inputs=[\n", + " ProcessingInput(source=input_data, destination=\"/opt/ml/processing/input\"),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"code/preprocessing.py\",\n", + ")\n", + "\n", + "step_process = ProcessingStep(name=\"AbaloneProcess\", step_args=processor_args)" + ] + }, + { + "cell_type": "markdown", + "id": "b42f9bb1", + "metadata": { + "papermill": { + "duration": 0.033806, + "end_time": "2022-07-13T16:07:51.617961", + "exception": false, + "start_time": "2022-07-13T16:07:51.584155", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Processing Step for Feature Engineering](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-2.png)" + ] + }, + { + "cell_type": "markdown", + "id": "dc187225", + "metadata": { + "papermill": { + "duration": 0.040739, + "end_time": "2022-07-13T16:07:51.691562", + "exception": false, + "start_time": "2022-07-13T16:07:51.650823", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Training Step to Train a Model\n", + "\n", + "In this section, use Amazon SageMaker's [XGBoost Algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html) to train on this dataset. Configure an Estimator for the XGBoost algorithm and the input dataset. A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, and saves a model to `model_dir` so that it can be hosted later.\n", + "\n", + "The model path where the models from training are saved is also specified.\n", + "\n", + "Note the `instance_type` parameter may be used in multiple places in the pipeline. In this case, the `instance_type` is passed into the estimator." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7407bef6", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:51.766085Z", + "iopub.status.busy": "2022-07-13T16:07:51.765580Z", + "iopub.status.idle": "2022-07-13T16:07:51.897005Z", + "shell.execute_reply": "2022-07-13T16:07:51.896426Z" + }, + "papermill": { + "duration": 0.171801, + "end_time": "2022-07-13T16:07:51.897137", + "exception": false, + "start_time": "2022-07-13T16:07:51.725336", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.estimator import Estimator\n", + "from sagemaker.inputs import TrainingInput\n", + "\n", + "model_path = f\"s3://{default_bucket}/AbaloneTrain\"\n", + "image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=\"ml.m5.xlarge\",\n", + ")\n", + "xgb_train = Estimator(\n", + " image_uri=image_uri,\n", + " instance_type=instance_type,\n", + " instance_count=1,\n", + " output_path=model_path,\n", + " role=role,\n", + " sagemaker_session=pipeline_session,\n", + ")\n", + "xgb_train.set_hyperparameters(\n", + " objective=\"reg:linear\",\n", + " num_round=50,\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.7,\n", + ")\n", + "\n", + "train_args = xgb_train.fit(\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"validation\"\n", + " ].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f86374ad", + "metadata": { + "papermill": { + "duration": 0.064705, + "end_time": "2022-07-13T16:07:51.995767", + "exception": false, + "start_time": "2022-07-13T16:07:51.931062", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Finally, we use the output of the estimator's `.fit()` method as arguments to the `TrainingStep`. By passing the `pipeline_session` to the `sagemaker_session`, calling `.fit()` does not launch the training job, it returns the arguments needed to run the job as a step in the pipeline.\n", + "\n", + "Pass in the `S3Uri` of the `\"train_data\"` output channel to the `.fit()` method. Also, use the other `\"test_data\"` output channel for model evaluation in the pipeline. The `properties` attribute of a Pipeline step matches the object model of the corresponding response of a describe call. These properties can be referenced as placeholder values and are resolved at runtime. For example, the `ProcessingStep` `properties` attribute matches the object model of the [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response object." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "724d8eef", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:52.195314Z", + "iopub.status.busy": "2022-07-13T16:07:52.193872Z", + "iopub.status.idle": "2022-07-13T16:07:52.195919Z", + "shell.execute_reply": "2022-07-13T16:07:52.196336Z" + }, + "papermill": { + "duration": 0.103611, + "end_time": "2022-07-13T16:07:52.196480", + "exception": false, + "start_time": "2022-07-13T16:07:52.092869", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.inputs import TrainingInput\n", + "from sagemaker.workflow.steps import TrainingStep\n", + "\n", + "\n", + "step_train = TrainingStep(\n", + " name=\"AbaloneTrain\",\n", + " step_args=train_args,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "61d7285f", + "metadata": { + "papermill": { + "duration": 0.097979, + "end_time": "2022-07-13T16:07:52.392020", + "exception": false, + "start_time": "2022-07-13T16:07:52.294041", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Training Step to Train a Model](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-3.png)" + ] + }, + { + "cell_type": "markdown", + "id": "fd58bfa7", + "metadata": { + "papermill": { + "duration": 0.097153, + "end_time": "2022-07-13T16:07:52.586494", + "exception": false, + "start_time": "2022-07-13T16:07:52.489341", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Model Evaluation Step to Evaluate the Trained Model\n", + "\n", + "First, develop an evaluation script that is specified in a Processing step that performs the model evaluation.\n", + "\n", + "After pipeline execution, you can examine the resulting `evaluation.json` for analysis.\n", + "\n", + "The evaluation script uses `xgboost` to do the following:\n", + "\n", + "* Load the model.\n", + "* Read the test data.\n", + "* Issue predictions against the test data.\n", + "* Build a classification report, including accuracy and ROC curve.\n", + "* Save the evaluation report to the evaluation directory." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a03a42f2", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:52.867379Z", + "iopub.status.busy": "2022-07-13T16:07:52.866257Z", + "iopub.status.idle": "2022-07-13T16:07:52.869025Z", + "shell.execute_reply": "2022-07-13T16:07:52.869384Z" + }, + "papermill": { + "duration": 0.184948, + "end_time": "2022-07-13T16:07:52.869516", + "exception": false, + "start_time": "2022-07-13T16:07:52.684568", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing code/evaluation.py\n" + ] + } + ], + "source": [ + "%%writefile code/evaluation.py\n", + "import json\n", + "import pathlib\n", + "import pickle\n", + "import tarfile\n", + "\n", + "import joblib\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xgboost\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " model_path = f\"/opt/ml/processing/model/model.tar.gz\"\n", + " with tarfile.open(model_path) as tar:\n", + " tar.extractall(path=\".\")\n", + "\n", + " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", + "\n", + " test_path = \"/opt/ml/processing/test/test.csv\"\n", + " df = pd.read_csv(test_path, header=None)\n", + "\n", + " y_test = df.iloc[:, 0].to_numpy()\n", + " df.drop(df.columns[0], axis=1, inplace=True)\n", + "\n", + " X_test = xgboost.DMatrix(df.values)\n", + "\n", + " predictions = model.predict(X_test)\n", + "\n", + " mse = mean_squared_error(y_test, predictions)\n", + " std = np.std(y_test - predictions)\n", + " report_dict = {\n", + " \"regression_metrics\": {\n", + " \"mse\": {\"value\": mse, \"standard_deviation\": std},\n", + " },\n", + " }\n", + "\n", + " output_dir = \"/opt/ml/processing/evaluation\"\n", + " pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n", + "\n", + " evaluation_path = f\"{output_dir}/evaluation.json\"\n", + " with open(evaluation_path, \"w\") as f:\n", + " f.write(json.dumps(report_dict))" + ] + }, + { + "cell_type": "markdown", + "id": "3db80eee", + "metadata": { + "papermill": { + "duration": 0.033789, + "end_time": "2022-07-13T16:07:52.999885", + "exception": false, + "start_time": "2022-07-13T16:07:52.966096", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Next, create an instance of a `ScriptProcessor` processor and use it in the `ProcessingStep`." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "53230930", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:53.265975Z", + "iopub.status.busy": "2022-07-13T16:07:53.265226Z", + "iopub.status.idle": "2022-07-13T16:07:53.339364Z", + "shell.execute_reply": "2022-07-13T16:07:53.338974Z" + }, + "papermill": { + "duration": 0.242851, + "end_time": "2022-07-13T16:07:53.339481", + "exception": false, + "start_time": "2022-07-13T16:07:53.096630", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: script-abalone-eval-2022-07-13-16-07-53-265\n", + "Inputs: [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': , 'LocalPath': '/opt/ml/processing/model', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-2', 'AppManaged': False, 'S3Input': {'S3Uri': , 'LocalPath': '/opt/ml/processing/test', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/script-abalone-eval-2022-07-13-16-07-53-265/input/code/evaluation.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'evaluation', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/script-abalone-eval-2022-07-13-16-07-53-265/output/evaluation', 'LocalPath': '/opt/ml/processing/evaluation', 'S3UploadMode': 'EndOfJob'}}]\n" + ] + } + ], + "source": [ + "from sagemaker.processing import ScriptProcessor\n", + "\n", + "\n", + "script_eval = ScriptProcessor(\n", + " image_uri=image_uri,\n", + " command=[\"python3\"],\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=1,\n", + " base_job_name=\"script-abalone-eval\",\n", + " role=role,\n", + " sagemaker_session=pipeline_session,\n", + ")\n", + "\n", + "eval_args = script_eval.run(\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", + " ],\n", + " code=\"code/evaluation.py\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "858206cd", + "metadata": { + "papermill": { + "duration": 0.097754, + "end_time": "2022-07-13T16:07:53.494882", + "exception": false, + "start_time": "2022-07-13T16:07:53.397128", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Use the processor's arguments returned by `.run()` to construct a `ProcessingStep`, along with the input and output channels and the code that will be executed when the pipeline invokes pipeline execution.\n", + "\n", + "Specifically, the `S3ModelArtifacts` from the `step_train` `properties` and the `S3Uri` of the `\"test_data\"` output channel of the `step_process` `properties` are passed as inputs. The `TrainingStep` and `ProcessingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) and [DescribeProcessingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeProcessingJob.html) response objects, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ae643d67", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:53.695523Z", + "iopub.status.busy": "2022-07-13T16:07:53.694111Z", + "iopub.status.idle": "2022-07-13T16:07:53.696160Z", + "shell.execute_reply": "2022-07-13T16:07:53.696534Z" + }, + "papermill": { + "duration": 0.104102, + "end_time": "2022-07-13T16:07:53.696669", + "exception": false, + "start_time": "2022-07-13T16:07:53.592567", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.workflow.properties import PropertyFile\n", + "\n", + "\n", + "evaluation_report = PropertyFile(\n", + " name=\"EvaluationReport\", output_name=\"evaluation\", path=\"evaluation.json\"\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"AbaloneEval\",\n", + " step_args=eval_args,\n", + " property_files=[evaluation_report],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "99f1a3c2", + "metadata": { + "papermill": { + "duration": 0.096611, + "end_time": "2022-07-13T16:07:53.966367", + "exception": false, + "start_time": "2022-07-13T16:07:53.869756", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Model Evaluation Step to Evaluate the Trained Model](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-4.png)" + ] + }, + { + "cell_type": "markdown", + "id": "1ff0a560", + "metadata": { + "papermill": { + "duration": 0.097005, + "end_time": "2022-07-13T16:07:54.096931", + "exception": false, + "start_time": "2022-07-13T16:07:53.999926", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Create Model Step to Create a Model\n", + "\n", + "In order to perform batch transformation using the example model, create a SageMaker model.\n", + "\n", + "Specifically, pass in the `S3ModelArtifacts` from the `TrainingStep`, `step_train` properties. The `TrainingStep` `properties` attribute matches the object model of the [DescribeTrainingJob](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html) response object." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6aab382f", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:54.292477Z", + "iopub.status.busy": "2022-07-13T16:07:54.291118Z", + "iopub.status.idle": "2022-07-13T16:07:54.293086Z", + "shell.execute_reply": "2022-07-13T16:07:54.293444Z" + }, + "papermill": { + "duration": 0.103791, + "end_time": "2022-07-13T16:07:54.293572", + "exception": false, + "start_time": "2022-07-13T16:07:54.189781", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.model import Model\n", + "\n", + "model = Model(\n", + " image_uri=image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d9be254b", + "metadata": { + "papermill": { + "duration": 0.099816, + "end_time": "2022-07-13T16:07:54.490471", + "exception": false, + "start_time": "2022-07-13T16:07:54.390655", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Define the `ModelStep` by providing the return values from `model.create()` as the step arguments." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7a8dc222", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:54.690071Z", + "iopub.status.busy": "2022-07-13T16:07:54.689317Z", + "iopub.status.idle": "2022-07-13T16:07:54.694056Z", + "shell.execute_reply": "2022-07-13T16:07:54.694467Z" + }, + "papermill": { + "duration": 0.106688, + "end_time": "2022-07-13T16:07:54.694631", + "exception": false, + "start_time": "2022-07-13T16:07:54.587943", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.inputs import CreateModelInput\n", + "from sagemaker.workflow.model_step import ModelStep\n", + "\n", + "step_create_model = ModelStep(\n", + " name=\"AbaloneCreateModel\",\n", + " step_args=model.create(instance_type=\"ml.m5.large\", accelerator_type=\"ml.eia1.medium\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c26c7052", + "metadata": { + "papermill": { + "duration": 0.092854, + "end_time": "2022-07-13T16:07:54.964979", + "exception": false, + "start_time": "2022-07-13T16:07:54.872125", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Transform Step to Perform Batch Transformation\n", + "\n", + "Now that a model instance is defined, create a `Transformer` instance with the appropriate model type, compute instance type, and desired output S3 URI.\n", + "\n", + "Specifically, pass in the `ModelName` from the `CreateModelStep`, `step_create_model` properties. The `CreateModelStep` `properties` attribute matches the object model of the [DescribeModel](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeModel.html) response object." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "f55318d8", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:55.168074Z", + "iopub.status.busy": "2022-07-13T16:07:55.167333Z", + "iopub.status.idle": "2022-07-13T16:07:55.175645Z", + "shell.execute_reply": "2022-07-13T16:07:55.176011Z" + }, + "papermill": { + "duration": 0.177212, + "end_time": "2022-07-13T16:07:55.176173", + "exception": false, + "start_time": "2022-07-13T16:07:54.998961", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.transformer import Transformer\n", + "\n", + "\n", + "transformer = Transformer(\n", + " model_name=step_create_model.properties.ModelName,\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=1,\n", + " output_path=f\"s3://{default_bucket}/AbaloneTransform\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6227a29a", + "metadata": { + "papermill": { + "duration": 0.102855, + "end_time": "2022-07-13T16:07:55.376256", + "exception": false, + "start_time": "2022-07-13T16:07:55.273401", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Pass in the transformer instance and the `TransformInput` with the `batch_data` pipeline parameter defined earlier." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6e1aa0ad", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:55.578902Z", + "iopub.status.busy": "2022-07-13T16:07:55.577537Z", + "iopub.status.idle": "2022-07-13T16:07:55.579522Z", + "shell.execute_reply": "2022-07-13T16:07:55.579879Z" + }, + "papermill": { + "duration": 0.110305, + "end_time": "2022-07-13T16:07:55.580011", + "exception": false, + "start_time": "2022-07-13T16:07:55.469706", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.inputs import TransformInput\n", + "from sagemaker.workflow.steps import TransformStep\n", + "\n", + "\n", + "step_transform = TransformStep(\n", + " name=\"AbaloneTransform\", transformer=transformer, inputs=TransformInput(data=batch_data)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e400b0f0", + "metadata": { + "papermill": { + "duration": 0.097293, + "end_time": "2022-07-13T16:07:55.779132", + "exception": false, + "start_time": "2022-07-13T16:07:55.681839", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Register Model Step to Create a Model Package\n", + "\n", + "A model package is an abstraction of reusable model artifacts that packages all ingredients required for inference. Primarily, it consists of an inference specification that defines the inference image to use along with an optional model weights location.\n", + "\n", + "A model package group is a collection of model packages. A model package group can be created for a specific ML business problem, and new versions of the model packages can be added to it. Typically, customers are expected to create a ModelPackageGroup for a SageMaker pipeline so that model package versions can be added to the group for every SageMaker Pipeline run.\n", + "\n", + "To register a model in the Model Registry, we take the model created in the previous steps\n", + "```\n", + "model = Model(\n", + " image_uri=image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")\n", + "```\n", + "and call the `.register()` function on it while passing all the parameters needed for registering the model.\n", + "\n", + "We take the outputs of the `.register()` call and pass that to the `ModelStep` as step arguments." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "49268979", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:55.990666Z", + "iopub.status.busy": "2022-07-13T16:07:55.989233Z", + "iopub.status.idle": "2022-07-13T16:07:55.991258Z", + "shell.execute_reply": "2022-07-13T16:07:55.991663Z" + }, + "papermill": { + "duration": 0.106521, + "end_time": "2022-07-13T16:07:55.991800", + "exception": false, + "start_time": "2022-07-13T16:07:55.885279", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.model_metrics import MetricsSource, ModelMetrics\n", + "\n", + "model_metrics = ModelMetrics(\n", + " model_statistics=MetricsSource(\n", + " s3_uri=\"{}/evaluation.json\".format(\n", + " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", + " ),\n", + " content_type=\"application/json\",\n", + " )\n", + ")\n", + "\n", + "register_args = model.register(\n", + " content_types=[\"text/csv\"],\n", + " response_types=[\"text/csv\"],\n", + " inference_instances=[\"ml.t2.medium\", \"ml.m5.xlarge\"],\n", + " transform_instances=[\"ml.m5.xlarge\"],\n", + " model_package_group_name=model_package_group_name,\n", + " approval_status=model_approval_status,\n", + " model_metrics=model_metrics,\n", + ")\n", + "step_register = ModelStep(name=\"AbaloneRegisterModel\", step_args=register_args)" + ] + }, + { + "cell_type": "markdown", + "id": "d1622708", + "metadata": { + "papermill": { + "duration": 0.093149, + "end_time": "2022-07-13T16:07:56.186199", + "exception": false, + "start_time": "2022-07-13T16:07:56.093050", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Create Model Step and Batch Transform to Process Data in Batch at Scale](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-5.png)" + ] + }, + { + "cell_type": "markdown", + "id": "756157c0", + "metadata": { + "papermill": { + "duration": 0.097569, + "end_time": "2022-07-13T16:07:56.381372", + "exception": false, + "start_time": "2022-07-13T16:07:56.283803", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Fail Step to Terminate the Pipeline Execution and Mark it as Failed\n", + "\n", + "This section walks you through the following steps:\n", + "\n", + "* Define a `FailStep` with customized error message, which indicates the cause of the execution failure.\n", + "* Enter the `FailStep` error message with a `Join` function, which appends a static text string with the dynamic `mse_threshold` parameter to build a more informative error message." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "f041c8a8", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:56.670215Z", + "iopub.status.busy": "2022-07-13T16:07:56.669270Z", + "iopub.status.idle": "2022-07-13T16:07:56.675249Z", + "shell.execute_reply": "2022-07-13T16:07:56.674652Z" + }, + "papermill": { + "duration": 0.193196, + "end_time": "2022-07-13T16:07:56.675378", + "exception": false, + "start_time": "2022-07-13T16:07:56.482182", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.workflow.fail_step import FailStep\n", + "from sagemaker.workflow.functions import Join\n", + "\n", + "step_fail = FailStep(\n", + " name=\"AbaloneMSEFail\",\n", + " error_message=Join(on=\" \", values=[\"Execution failed due to MSE >\", mse_threshold]),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8fcf7e9c", + "metadata": { + "papermill": { + "duration": 0.182821, + "end_time": "2022-07-13T16:07:56.967379", + "exception": false, + "start_time": "2022-07-13T16:07:56.784558", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Fail Step to Terminate the Execution in Failed State](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-8.png)" + ] + }, + { + "cell_type": "markdown", + "id": "fd777542", + "metadata": { + "papermill": { + "duration": 0.034317, + "end_time": "2022-07-13T16:07:57.099302", + "exception": false, + "start_time": "2022-07-13T16:07:57.064985", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Condition Step to Check Accuracy and Conditionally Create a Model and Run a Batch Transformation and Register a Model in the Model Registry, Or Terminate the Execution in Failed State\n", + "\n", + "In this step, the model is registered only if the accuracy of the model, as determined by the evaluation step `step_eval`, exceeded a specified value. Otherwise, the pipeline execution fails and terminates. A `ConditionStep` enables pipelines to support conditional execution in the pipeline DAG based on the conditions of the step properties.\n", + "\n", + "In the following section, you:\n", + "\n", + "* Define a `ConditionLessThanOrEqualTo` on the accuracy value found in the output of the evaluation step, `step_eval`.\n", + "* Use the condition in the list of conditions in a `ConditionStep`.\n", + "* Pass the `CreateModelStep` and `TransformStep` steps, and the `RegisterModel` step collection into the `if_steps` of the `ConditionStep`, which are only executed if the condition evaluates to `True`.\n", + "* Pass the `FailStep` step into the `else_steps`of the `ConditionStep`, which is only executed if the condition evaluates to `False`." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8969e1fc", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:57.300757Z", + "iopub.status.busy": "2022-07-13T16:07:57.299973Z", + "iopub.status.idle": "2022-07-13T16:07:57.367501Z", + "shell.execute_reply": "2022-07-13T16:07:57.367888Z" + }, + "papermill": { + "duration": 0.171087, + "end_time": "2022-07-13T16:07:57.368032", + "exception": false, + "start_time": "2022-07-13T16:07:57.196945", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", + "from sagemaker.workflow.condition_step import ConditionStep\n", + "from sagemaker.workflow.functions import JsonGet\n", + "\n", + "\n", + "cond_lte = ConditionLessThanOrEqualTo(\n", + " left=JsonGet(\n", + " step_name=step_eval.name,\n", + " property_file=evaluation_report,\n", + " json_path=\"regression_metrics.mse.value\",\n", + " ),\n", + " right=mse_threshold,\n", + ")\n", + "\n", + "step_cond = ConditionStep(\n", + " name=\"AbaloneMSECond\",\n", + " conditions=[cond_lte],\n", + " if_steps=[step_register, step_create_model, step_transform],\n", + " else_steps=[step_fail],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "772154ec", + "metadata": { + "papermill": { + "duration": 0.098709, + "end_time": "2022-07-13T16:07:57.564590", + "exception": false, + "start_time": "2022-07-13T16:07:57.465881", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Condition Step to Check Accuracy and Conditionally Execute Steps](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-6.png)" + ] + }, + { + "cell_type": "markdown", + "id": "5cd8a7d1", + "metadata": { + "papermill": { + "duration": 0.097983, + "end_time": "2022-07-13T16:07:57.697262", + "exception": false, + "start_time": "2022-07-13T16:07:57.599279", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Define a Pipeline of Parameters, Steps, and Conditions\n", + "\n", + "In this section, combine the steps into a Pipeline so it can be executed.\n", + "\n", + "A pipeline requires a `name`, `parameters`, and `steps`. Names must be unique within an `(account, region)` pair.\n", + "\n", + "Note:\n", + "\n", + "* All the parameters used in the definitions must be present.\n", + "* Steps passed into the pipeline do not have to be listed in the order of execution. The SageMaker Pipeline service resolves the data dependency DAG as steps for the execution to complete.\n", + "* Steps must be unique to across the pipeline step list and all condition step if/else lists." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "9ed7ea0f", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:57.977833Z", + "iopub.status.busy": "2022-07-13T16:07:57.977020Z", + "iopub.status.idle": "2022-07-13T16:07:57.996102Z", + "shell.execute_reply": "2022-07-13T16:07:57.995674Z" + }, + "papermill": { + "duration": 0.201248, + "end_time": "2022-07-13T16:07:57.996214", + "exception": false, + "start_time": "2022-07-13T16:07:57.794966", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.workflow.pipeline import Pipeline\n", + "\n", + "\n", + "pipeline_name = f\"AbalonePipeline\"\n", + "pipeline = Pipeline(\n", + " name=pipeline_name,\n", + " parameters=[\n", + " processing_instance_count,\n", + " instance_type,\n", + " model_approval_status,\n", + " input_data,\n", + " batch_data,\n", + " mse_threshold,\n", + " ],\n", + " steps=[step_process, step_train, step_eval, step_cond],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8bd7778c", + "metadata": { + "papermill": { + "duration": 0.098305, + "end_time": "2022-07-13T16:07:58.192290", + "exception": false, + "start_time": "2022-07-13T16:07:58.093985", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![Define a Pipeline of Parameters, Steps, and Conditions](https://raw.githubusercontent.com/aws/amazon-sagemaker-examples/main/sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-7.png)" + ] + }, + { + "cell_type": "markdown", + "id": "c1ae8658", + "metadata": { + "papermill": { + "duration": 0.097728, + "end_time": "2022-07-13T16:07:58.394347", + "exception": false, + "start_time": "2022-07-13T16:07:58.296619", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### (Optional) Examining the pipeline definition\n", + "\n", + "The JSON of the pipeline definition can be examined to confirm the pipeline is well-defined and the parameters and step properties resolve correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "39a836b8", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:58.681323Z", + "iopub.status.busy": "2022-07-13T16:07:58.680648Z", + "iopub.status.idle": "2022-07-13T16:07:58.684073Z", + "shell.execute_reply": "2022-07-13T16:07:58.684421Z" + }, + "papermill": { + "duration": 0.18974, + "end_time": "2022-07-13T16:07:58.684552", + "exception": false, + "start_time": "2022-07-13T16:07:58.494812", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Version': '2020-12-01',\n", + " 'Metadata': {},\n", + " 'Parameters': [{'Name': 'ProcessingInstanceCount',\n", + " 'Type': 'Integer',\n", + " 'DefaultValue': 1},\n", + " {'Name': 'TrainingInstanceType',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 'ml.m5.xlarge'},\n", + " {'Name': 'ModelApprovalStatus',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 'PendingManualApproval'},\n", + " {'Name': 'InputData',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 's3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset.csv'},\n", + " {'Name': 'BatchData',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 's3://sagemaker-us-west-2-000000000000/abalone/abalone-dataset-batch'},\n", + " {'Name': 'MseThreshold', 'Type': 'Float', 'DefaultValue': 6.0}],\n", + " 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},\n", + " 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},\n", + " 'Steps': [{'Name': 'AbaloneProcess',\n", + " 'Type': 'Processing',\n", + " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',\n", + " 'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},\n", + " 'VolumeSizeInGB': 30}},\n", + " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3',\n", + " 'ContainerEntrypoint': ['python3',\n", + " '/opt/ml/processing/input/code/preprocessing.py']},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'ProcessingInputs': [{'InputName': 'input-1',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': {'Get': 'Parameters.InputData'},\n", + " 'LocalPath': '/opt/ml/processing/input',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}},\n", + " {'InputName': 'code',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/input/code/preprocessing.py',\n", + " 'LocalPath': '/opt/ml/processing/input/code',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}}],\n", + " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'train',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/train',\n", + " 'LocalPath': '/opt/ml/processing/train',\n", + " 'S3UploadMode': 'EndOfJob'}},\n", + " {'OutputName': 'validation',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/validation',\n", + " 'LocalPath': '/opt/ml/processing/validation',\n", + " 'S3UploadMode': 'EndOfJob'}},\n", + " {'OutputName': 'test',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/sklearn-abalone-process-2022-07-13-16-07-51-228/output/test',\n", + " 'LocalPath': '/opt/ml/processing/test',\n", + " 'S3UploadMode': 'EndOfJob'}}]}}},\n", + " {'Name': 'AbaloneTrain',\n", + " 'Type': 'Training',\n", + " 'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',\n", + " 'TrainingImage': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'},\n", + " 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTrain'},\n", + " 'StoppingCondition': {'MaxRuntimeInSeconds': 86400},\n", + " 'ResourceConfig': {'VolumeSizeInGB': 30,\n", + " 'InstanceCount': 1,\n", + " 'InstanceType': {'Get': 'Parameters.TrainingInstanceType'}},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", + " 'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['train'].S3Output.S3Uri\"},\n", + " 'S3DataDistributionType': 'FullyReplicated'}},\n", + " 'ContentType': 'text/csv',\n", + " 'ChannelName': 'train'},\n", + " {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", + " 'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['validation'].S3Output.S3Uri\"},\n", + " 'S3DataDistributionType': 'FullyReplicated'}},\n", + " 'ContentType': 'text/csv',\n", + " 'ChannelName': 'validation'}],\n", + " 'HyperParameters': {'objective': 'reg:linear',\n", + " 'num_round': '50',\n", + " 'max_depth': '5',\n", + " 'eta': '0.2',\n", + " 'gamma': '4',\n", + " 'min_child_weight': '6',\n", + " 'subsample': '0.7'},\n", + " 'ProfilerRuleConfigurations': [{'RuleConfigurationName': 'ProfilerReport-1657728471',\n", + " 'RuleEvaluatorImage': '895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest',\n", + " 'RuleParameters': {'rule_to_invoke': 'ProfilerReport'}}],\n", + " 'ProfilerConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTrain'}}},\n", + " {'Name': 'AbaloneEval',\n", + " 'Type': 'Processing',\n", + " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',\n", + " 'InstanceCount': 1,\n", + " 'VolumeSizeInGB': 30}},\n", + " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", + " 'ContainerEntrypoint': ['python3',\n", + " '/opt/ml/processing/input/code/evaluation.py']},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'ProcessingInputs': [{'InputName': 'input-1',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'},\n", + " 'LocalPath': '/opt/ml/processing/model',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}},\n", + " {'InputName': 'input-2',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': {'Get': \"Steps.AbaloneProcess.ProcessingOutputConfig.Outputs['test'].S3Output.S3Uri\"},\n", + " 'LocalPath': '/opt/ml/processing/test',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}},\n", + " {'InputName': 'code',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/script-abalone-eval-2022-07-13-16-07-53-265/input/code/evaluation.py',\n", + " 'LocalPath': '/opt/ml/processing/input/code',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}}],\n", + " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'evaluation',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/script-abalone-eval-2022-07-13-16-07-53-265/output/evaluation',\n", + " 'LocalPath': '/opt/ml/processing/evaluation',\n", + " 'S3UploadMode': 'EndOfJob'}}]}},\n", + " 'PropertyFiles': [{'PropertyFileName': 'EvaluationReport',\n", + " 'OutputName': 'evaluation',\n", + " 'FilePath': 'evaluation.json'}]},\n", + " {'Name': 'AbaloneMSECond',\n", + " 'Type': 'Condition',\n", + " 'Arguments': {'Conditions': [{'Type': 'LessThanOrEqualTo',\n", + " 'LeftValue': {'Std:JsonGet': {'PropertyFile': {'Get': 'Steps.AbaloneEval.PropertyFiles.EvaluationReport'},\n", + " 'Path': 'regression_metrics.mse.value'}},\n", + " 'RightValue': {'Get': 'Parameters.MseThreshold'}}],\n", + " 'IfSteps': [{'Name': 'AbaloneRegisterModel-RegisterModel',\n", + " 'Type': 'RegisterModel',\n", + " 'Arguments': {'ModelPackageGroupName': 'AbaloneModelPackageGroupName',\n", + " 'ModelMetrics': {'ModelQuality': {'Statistics': {'ContentType': 'application/json',\n", + " 'S3Uri': 's3://sagemaker-us-west-2-000000000000/script-abalone-eval-2022-07-13-16-07-53-265/output/evaluation/evaluation.json'}},\n", + " 'Bias': {},\n", + " 'Explainability': {}},\n", + " 'InferenceSpecification': {'Containers': [{'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", + " 'Environment': {},\n", + " 'ModelDataUrl': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'}}],\n", + " 'SupportedContentTypes': ['text/csv'],\n", + " 'SupportedResponseMIMETypes': ['text/csv'],\n", + " 'SupportedRealtimeInferenceInstanceTypes': ['ml.t2.medium',\n", + " 'ml.m5.xlarge'],\n", + " 'SupportedTransformInstanceTypes': ['ml.m5.xlarge']},\n", + " 'ModelApprovalStatus': {'Get': 'Parameters.ModelApprovalStatus'}}},\n", + " {'Name': 'AbaloneCreateModel-CreateModel',\n", + " 'Type': 'Model',\n", + " 'Arguments': {'ExecutionRoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'PrimaryContainer': {'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", + " 'Environment': {},\n", + " 'ModelDataUrl': {'Get': 'Steps.AbaloneTrain.ModelArtifacts.S3ModelArtifacts'}}}},\n", + " {'Name': 'AbaloneTransform',\n", + " 'Type': 'Transform',\n", + " 'Arguments': {'ModelName': {'Get': 'Steps.AbaloneCreateModel-CreateModel.ModelName'},\n", + " 'TransformInput': {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", + " 'S3Uri': {'Get': 'Parameters.BatchData'}}}},\n", + " 'TransformOutput': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/AbaloneTransform'},\n", + " 'TransformResources': {'InstanceCount': 1,\n", + " 'InstanceType': 'ml.m5.xlarge'}}}],\n", + " 'ElseSteps': [{'Name': 'AbaloneMSEFail',\n", + " 'Type': 'Fail',\n", + " 'Arguments': {'ErrorMessage': {'Std:Join': {'On': ' ',\n", + " 'Values': ['Execution failed due to MSE >',\n", + " {'Get': 'Parameters.MseThreshold'}]}}}}]}}]}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "\n", + "definition = json.loads(pipeline.definition())\n", + "definition" + ] + }, + { + "cell_type": "markdown", + "id": "0b396c63", + "metadata": { + "papermill": { + "duration": 0.093902, + "end_time": "2022-07-13T16:07:58.880484", + "exception": false, + "start_time": "2022-07-13T16:07:58.786582", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Submit the pipeline to SageMaker and start execution\n", + "\n", + "Submit the pipeline definition to the Pipeline service. The Pipeline service uses the role that is passed in to create all the jobs defined in the steps." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "944bcafb", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:07:59.093264Z", + "iopub.status.busy": "2022-07-13T16:07:59.092600Z", + "iopub.status.idle": "2022-07-13T16:08:00.011312Z", + "shell.execute_reply": "2022-07-13T16:08:00.011954Z" + }, + "papermill": { + "duration": 1.022416, + "end_time": "2022-07-13T16:08:00.012097", + "exception": false, + "start_time": "2022-07-13T16:07:58.989681", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline',\n", + " 'ResponseMetadata': {'RequestId': 'dcee2b6f-839c-4493-bc40-26beda53949e',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': 'dcee2b6f-839c-4493-bc40-26beda53949e',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '83',\n", + " 'date': 'Wed, 13 Jul 2022 16:07:59 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline.upsert(role_arn=role)" + ] + }, + { + "cell_type": "markdown", + "id": "7c7133c2", + "metadata": { + "papermill": { + "duration": 0.03559, + "end_time": "2022-07-13T16:08:00.083359", + "exception": false, + "start_time": "2022-07-13T16:08:00.047769", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Start the pipeline and accept all the default parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "cde6f323", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:08:00.168396Z", + "iopub.status.busy": "2022-07-13T16:08:00.167738Z", + "iopub.status.idle": "2022-07-13T16:08:00.468960Z", + "shell.execute_reply": "2022-07-13T16:08:00.469840Z" + }, + "papermill": { + "duration": 0.346706, + "end_time": "2022-07-13T16:08:00.470006", + "exception": false, + "start_time": "2022-07-13T16:08:00.123300", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution = pipeline.start()" + ] + }, + { + "cell_type": "markdown", + "id": "2a608977", + "metadata": { + "papermill": { + "duration": 0.040044, + "end_time": "2022-07-13T16:08:00.552015", + "exception": false, + "start_time": "2022-07-13T16:08:00.511971", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Pipeline Operations: Examining and Waiting for Pipeline Execution\n", + "\n", + "Describe the pipeline execution." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "7140e583", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:08:00.638590Z", + "iopub.status.busy": "2022-07-13T16:08:00.638043Z", + "iopub.status.idle": "2022-07-13T16:08:00.979910Z", + "shell.execute_reply": "2022-07-13T16:08:00.980329Z" + }, + "papermill": { + "duration": 0.388899, + "end_time": "2022-07-13T16:08:00.980580", + "exception": false, + "start_time": "2022-07-13T16:08:00.591681", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline',\n", + " 'PipelineExecutionArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/abalonepipeline/execution/d84ewltjmxhe',\n", + " 'PipelineExecutionDisplayName': 'execution-1657728480421',\n", + " 'PipelineExecutionStatus': 'Executing',\n", + " 'CreationTime': datetime.datetime(2022, 7, 13, 16, 8, 0, 354000, tzinfo=tzlocal()),\n", + " 'LastModifiedTime': datetime.datetime(2022, 7, 13, 16, 8, 0, 354000, tzinfo=tzlocal()),\n", + " 'CreatedBy': {},\n", + " 'LastModifiedBy': {},\n", + " 'ResponseMetadata': {'RequestId': '04d9849a-0f5d-40f7-88d9-30e87f51908a',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '04d9849a-0f5d-40f7-88d9-30e87f51908a',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '395',\n", + " 'date': 'Wed, 13 Jul 2022 16:08:00 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "d548cb82", + "metadata": { + "papermill": { + "duration": 0.058959, + "end_time": "2022-07-13T16:08:01.088218", + "exception": false, + "start_time": "2022-07-13T16:08:01.029259", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Wait for the execution to complete." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "97251833", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:08:01.164376Z", + "iopub.status.busy": "2022-07-13T16:08:01.163859Z", + "iopub.status.idle": "2022-07-13T16:27:11.118707Z", + "shell.execute_reply": "2022-07-13T16:27:11.117917Z" + }, + "papermill": { + "duration": 1149.995026, + "end_time": "2022-07-13T16:27:11.118837", + "exception": false, + "start_time": "2022-07-13T16:08:01.123811", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.wait()" + ] + }, + { + "cell_type": "markdown", + "id": "a4dc0b89", + "metadata": { + "papermill": { + "duration": 0.038587, + "end_time": "2022-07-13T16:27:11.202554", + "exception": false, + "start_time": "2022-07-13T16:27:11.163967", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "List the steps in the execution. These are the steps in the pipeline that have been resolved by the step executor service." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "6293013c", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:27:11.287922Z", + "iopub.status.busy": "2022-07-13T16:27:11.287440Z", + "iopub.status.idle": "2022-07-13T16:27:11.552799Z", + "shell.execute_reply": "2022-07-13T16:27:11.553188Z" + }, + "papermill": { + "duration": 0.311444, + "end_time": "2022-07-13T16:27:11.553327", + "exception": false, + "start_time": "2022-07-13T16:27:11.241883", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'StepName': 'AbaloneTransform',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 28, 497000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 26, 41, 240000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-d84ewltjmxhe-abalonetransform-hjf5dpst8n'}}},\n", + " {'StepName': 'AbaloneRegisterModel-RegisterModel',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 227000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 908000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/1'}}},\n", + " {'StepName': 'AbaloneCreateModel-CreateModel',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 227000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 28, 88000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-d84ewltjmxhe-abalonecreatemodel-c-dgvk2pwxwn'}}},\n", + " {'StepName': 'AbaloneMSECond',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 25, 972000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 26, 697000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'Condition': {'Outcome': 'True'}}},\n", + " {'StepName': 'AbaloneEval',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 16, 42, 221000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 25, 344000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-d84ewltjmxhe-abaloneeval-gq8u48ijdl'}}},\n", + " {'StepName': 'AbaloneTrain',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 13, 8, 547000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 16, 41, 447000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-d84ewltjmxhe-abalonetrain-3kyzvpyx11'}}},\n", + " {'StepName': 'AbaloneProcess',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 8, 2, 216000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 13, 7, 881000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-d84ewltjmxhe-abaloneprocess-alx6e7utl3'}}}]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.list_steps()" + ] + }, + { + "cell_type": "markdown", + "id": "f1b6e6aa", + "metadata": { + "papermill": { + "duration": 0.043169, + "end_time": "2022-07-13T16:27:11.634677", + "exception": false, + "start_time": "2022-07-13T16:27:11.591508", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Examining the Evaluation\n", + "\n", + "Examine the resulting model evaluation after the pipeline completes. Download the resulting `evaluation.json` file from S3 and print the report." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "f151e408", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:27:11.740982Z", + "iopub.status.busy": "2022-07-13T16:27:11.740105Z", + "iopub.status.idle": "2022-07-13T16:27:12.193225Z", + "shell.execute_reply": "2022-07-13T16:27:12.192820Z" + }, + "papermill": { + "duration": 0.514239, + "end_time": "2022-07-13T16:27:12.193344", + "exception": false, + "start_time": "2022-07-13T16:27:11.679105", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'regression_metrics': {'mse': {'standard_deviation': 2.2157255951663437,\n", + " 'value': 4.913856830660247}}}\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "\n", + "\n", + "evaluation_json = sagemaker.s3.S3Downloader.read_file(\n", + " \"{}/evaluation.json\".format(\n", + " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", + " )\n", + ")\n", + "pprint(json.loads(evaluation_json))" + ] + }, + { + "cell_type": "markdown", + "id": "08eec00d", + "metadata": { + "papermill": { + "duration": 0.036632, + "end_time": "2022-07-13T16:27:12.266645", + "exception": false, + "start_time": "2022-07-13T16:27:12.230013", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Lineage\n", + "\n", + "Review the lineage of the artifacts generated by the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ac629990", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:27:12.347880Z", + "iopub.status.busy": "2022-07-13T16:27:12.347015Z", + "iopub.status.idle": "2022-07-13T16:27:49.232675Z", + "shell.execute_reply": "2022-07-13T16:27:49.233495Z" + }, + "papermill": { + "duration": 36.928591, + "end_time": "2022-07-13T16:27:49.233763", + "exception": false, + "start_time": "2022-07-13T16:27:12.305172", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneProcess', 'StartTime': datetime.datetime(2022, 7, 13, 16, 8, 2, 216000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 13, 7, 881000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-d84ewltjmxhe-abaloneprocess-alx6e7utl3'}}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...16-07-51-228/input/code/preprocessing.pyInputDataSetContributedToartifact
1s3://...000000000000/abalone/abalone-dataset.csvInputDataSetContributedToartifact
224661...om/sagemaker-scikit-learn:0.23-1-cpu-py3InputImageContributedToartifact
3s3://...cess-2022-07-13-16-07-51-228/output/testOutputDataSetProducedartifact
4s3://...022-07-13-16-07-51-228/output/validationOutputDataSetProducedartifact
5s3://...ess-2022-07-13-16-07-51-228/output/trainOutputDataSetProducedartifact
\n", + "
" + ], + "text/plain": [ + " Name/Source Direction Type \\\n", + "0 s3://...16-07-51-228/input/code/preprocessing.py Input DataSet \n", + "1 s3://...000000000000/abalone/abalone-dataset.csv Input DataSet \n", + "2 24661...om/sagemaker-scikit-learn:0.23-1-cpu-py3 Input Image \n", + "3 s3://...cess-2022-07-13-16-07-51-228/output/test Output DataSet \n", + "4 s3://...022-07-13-16-07-51-228/output/validation Output DataSet \n", + "5 s3://...ess-2022-07-13-16-07-51-228/output/train Output DataSet \n", + "\n", + " Association Type Lineage Type \n", + "0 ContributedTo artifact \n", + "1 ContributedTo artifact \n", + "2 ContributedTo artifact \n", + "3 Produced artifact \n", + "4 Produced artifact \n", + "5 Produced artifact " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneTrain', 'StartTime': datetime.datetime(2022, 7, 13, 16, 13, 8, 547000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 16, 41, 447000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-d84ewltjmxhe-abalonetrain-3kyzvpyx11'}}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...022-07-13-16-07-51-228/output/validationInputDataSetContributedToartifact
1s3://...ess-2022-07-13-16-07-51-228/output/trainInputDataSetContributedToartifact
224661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
3s3://...loneTrain-3kYZvpYx11/output/model.tar.gzOutputModelProducedartifact
\n", + "
" + ], + "text/plain": [ + " Name/Source Direction Type \\\n", + "0 s3://...022-07-13-16-07-51-228/output/validation Input DataSet \n", + "1 s3://...ess-2022-07-13-16-07-51-228/output/train Input DataSet \n", + "2 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", + "3 s3://...loneTrain-3kYZvpYx11/output/model.tar.gz Output Model \n", + "\n", + " Association Type Lineage Type \n", + "0 ContributedTo artifact \n", + "1 ContributedTo artifact \n", + "2 ContributedTo artifact \n", + "3 Produced artifact " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneEval', 'StartTime': datetime.datetime(2022, 7, 13, 16, 16, 42, 221000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 25, 344000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-d84ewltjmxhe-abaloneeval-gq8u48ijdl'}}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...13-16-07-53-265/input/code/evaluation.pyInputDataSetContributedToartifact
1s3://...cess-2022-07-13-16-07-51-228/output/testInputDataSetContributedToartifact
2s3://...loneTrain-3kYZvpYx11/output/model.tar.gzInputModelContributedToartifact
324661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
4s3://...022-07-13-16-07-53-265/output/evaluationOutputDataSetProducedartifact
\n", + "
" + ], + "text/plain": [ + " Name/Source Direction Type \\\n", + "0 s3://...13-16-07-53-265/input/code/evaluation.py Input DataSet \n", + "1 s3://...cess-2022-07-13-16-07-51-228/output/test Input DataSet \n", + "2 s3://...loneTrain-3kYZvpYx11/output/model.tar.gz Input Model \n", + "3 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", + "4 s3://...022-07-13-16-07-53-265/output/evaluation Output DataSet \n", + "\n", + " Association Type Lineage Type \n", + "0 ContributedTo artifact \n", + "1 ContributedTo artifact \n", + "2 ContributedTo artifact \n", + "3 ContributedTo artifact \n", + "4 Produced artifact " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneMSECond', 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 25, 972000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 26, 697000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'Condition': {'Outcome': 'True'}}}\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneCreateModel-CreateModel', 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 227000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 28, 88000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-d84ewltjmxhe-abalonecreatemodel-c-dgvk2pwxwn'}}}\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneRegisterModel-RegisterModel', 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 227000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 21, 27, 908000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/1'}}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...loneTrain-3kYZvpYx11/output/model.tar.gzInputModelContributedToartifact
124661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
2abalonemodelpackagegroupname-1-PendingManualAp...InputApprovalContributedToaction
3AbaloneModelPackageGroupName-1657729287-aws-mo...OutputModelGroupAssociatedWithcontext
\n", + "
" + ], + "text/plain": [ + " Name/Source Direction Type \\\n", + "0 s3://...loneTrain-3kYZvpYx11/output/model.tar.gz Input Model \n", + "1 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", + "2 abalonemodelpackagegroupname-1-PendingManualAp... Input Approval \n", + "3 AbaloneModelPackageGroupName-1657729287-aws-mo... Output ModelGroup \n", + "\n", + " Association Type Lineage Type \n", + "0 ContributedTo artifact \n", + "1 ContributedTo artifact \n", + "2 ContributedTo action \n", + "3 AssociatedWith context " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'StepName': 'AbaloneTransform', 'StartTime': datetime.datetime(2022, 7, 13, 16, 21, 28, 497000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2022, 7, 13, 16, 26, 41, 240000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-d84ewltjmxhe-abalonetransform-hjf5dpst8n'}}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name/SourceDirectionTypeAssociation TypeLineage Type
0s3://...loneTrain-3kYZvpYx11/output/model.tar.gzInputModelContributedToartifact
124661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3InputImageContributedToartifact
2s3://...1695447989/abalone/abalone-dataset-batchInputDataSetContributedToartifact
3s3://...-us-west-2-000000000000/AbaloneTransformOutputDataSetProducedartifact
\n", + "
" + ], + "text/plain": [ + " Name/Source Direction Type \\\n", + "0 s3://...loneTrain-3kYZvpYx11/output/model.tar.gz Input Model \n", + "1 24661...naws.com/sagemaker-xgboost:1.0-1-cpu-py3 Input Image \n", + "2 s3://...1695447989/abalone/abalone-dataset-batch Input DataSet \n", + "3 s3://...-us-west-2-000000000000/AbaloneTransform Output DataSet \n", + "\n", + " Association Type Lineage Type \n", + "0 ContributedTo artifact \n", + "1 ContributedTo artifact \n", + "2 ContributedTo artifact \n", + "3 Produced artifact " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import time\n", + "from sagemaker.lineage.visualizer import LineageTableVisualizer\n", + "\n", + "\n", + "viz = LineageTableVisualizer(sagemaker.session.Session())\n", + "for execution_step in reversed(execution.list_steps()):\n", + " print(execution_step)\n", + " display(viz.show(pipeline_execution_step=execution_step))\n", + " time.sleep(5)" + ] + }, + { + "cell_type": "markdown", + "id": "76c5a401", + "metadata": { + "papermill": { + "duration": 0.045193, + "end_time": "2022-07-13T16:27:49.331609", + "exception": false, + "start_time": "2022-07-13T16:27:49.286416", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Parametrized Executions\n", + "\n", + "You can run additional executions of the pipeline and specify different pipeline parameters. The `parameters` argument is a dictionary containing parameter names, and where the values are used to override the defaults values.\n", + "\n", + "Based on the performance of the model, you might want to kick off another pipeline execution on a compute-optimized instance type and set the model approval status to \"Approved\" automatically. This means that the model package version generated by the `RegisterModel` step is automatically ready for deployment through CI/CD pipelines, such as with SageMaker Projects." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "78b12dc2", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:27:49.433540Z", + "iopub.status.busy": "2022-07-13T16:27:49.432536Z", + "iopub.status.idle": "2022-07-13T16:27:50.039997Z", + "shell.execute_reply": "2022-07-13T16:27:50.040356Z" + }, + "papermill": { + "duration": 0.660009, + "end_time": "2022-07-13T16:27:50.040517", + "exception": false, + "start_time": "2022-07-13T16:27:49.380508", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution = pipeline.start(\n", + " parameters=dict(\n", + " ModelApprovalStatus=\"Approved\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "c053eb92", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:27:50.127383Z", + "iopub.status.busy": "2022-07-13T16:27:50.126899Z", + "iopub.status.idle": "2022-07-13T16:45:59.346748Z", + "shell.execute_reply": "2022-07-13T16:45:59.346269Z" + }, + "papermill": { + "duration": 1089.265129, + "end_time": "2022-07-13T16:45:59.346876", + "exception": false, + "start_time": "2022-07-13T16:27:50.081747", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "e498d8a3", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:45:59.460546Z", + "iopub.status.busy": "2022-07-13T16:45:59.459990Z", + "iopub.status.idle": "2022-07-13T16:45:59.682468Z", + "shell.execute_reply": "2022-07-13T16:45:59.682907Z" + }, + "papermill": { + "duration": 0.290742, + "end_time": "2022-07-13T16:45:59.683052", + "exception": false, + "start_time": "2022-07-13T16:45:59.392310", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'StepName': 'AbaloneTransform',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 40, 37, 410000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 45, 35, 417000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'TransformJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:transform-job/pipelines-ib5u1ar77ell-abalonetransform-ybfqj1idhf'}}},\n", + " {'StepName': 'AbaloneRegisterModel-RegisterModel',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 40, 35, 659000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 40, 36, 745000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model-package/abalonemodelpackagegroupname/2'}}},\n", + " {'StepName': 'AbaloneCreateModel-CreateModel',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 40, 35, 659000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 40, 36, 840000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:model/pipelines-ib5u1ar77ell-abalonecreatemodel-c-z2inp9am2j'}}},\n", + " {'StepName': 'AbaloneMSECond',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 40, 34, 694000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 40, 34, 978000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'Condition': {'Outcome': 'True'}}},\n", + " {'StepName': 'AbaloneEval',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 35, 44, 913000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 40, 34, 151000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-ib5u1ar77ell-abaloneeval-d1olj9t5kb'}}},\n", + " {'StepName': 'AbaloneTrain',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 32, 39, 527000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 35, 43, 780000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-ib5u1ar77ell-abalonetrain-hqep8kakhv'}}},\n", + " {'StepName': 'AbaloneProcess',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 27, 50, 852000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 32, 38, 578000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-ib5u1ar77ell-abaloneprocess-pfwnwtmeef'}}}]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.list_steps()" + ] + }, + { + "cell_type": "markdown", + "id": "c1f9631d", + "metadata": { + "papermill": { + "duration": 0.043074, + "end_time": "2022-07-13T16:45:59.771340", + "exception": false, + "start_time": "2022-07-13T16:45:59.728266", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Apart from that, you might also want to adjust the MSE threshold to a smaller value and raise the bar for the accuracy of the registered model. In this case you can override the MSE threshold like the following:" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "ff7f6820", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:45:59.870874Z", + "iopub.status.busy": "2022-07-13T16:45:59.869936Z", + "iopub.status.idle": "2022-07-13T16:46:00.219548Z", + "shell.execute_reply": "2022-07-13T16:46:00.219917Z" + }, + "papermill": { + "duration": 0.403974, + "end_time": "2022-07-13T16:46:00.220070", + "exception": false, + "start_time": "2022-07-13T16:45:59.816096", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution = pipeline.start(parameters=dict(MseThreshold=3.0))" + ] + }, + { + "cell_type": "markdown", + "id": "c07f95d7", + "metadata": { + "papermill": { + "duration": 0.078875, + "end_time": "2022-07-13T16:46:00.342112", + "exception": false, + "start_time": "2022-07-13T16:46:00.263237", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "If the MSE threshold is not satisfied, the pipeline execution enters the `FailStep` and is marked as failed." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1f244f42", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:46:00.433733Z", + "iopub.status.busy": "2022-07-13T16:46:00.433177Z", + "iopub.status.idle": "2022-07-13T16:59:07.315832Z", + "shell.execute_reply": "2022-07-13T16:59:07.316625Z" + }, + "papermill": { + "duration": 786.932424, + "end_time": "2022-07-13T16:59:07.316785", + "exception": false, + "start_time": "2022-07-13T16:46:00.384361", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiter PipelineExecutionComplete failed: Waiter encountered a terminal failure state: For expression \"PipelineExecutionStatus\" we matched expected path: \"Failed\"\n" + ] + } + ], + "source": [ + "try:\n", + " execution.wait()\n", + "except Exception as error:\n", + " print(error)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "62081cff", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:59:07.436018Z", + "iopub.status.busy": "2022-07-13T16:59:07.435161Z", + "iopub.status.idle": "2022-07-13T16:59:07.610216Z", + "shell.execute_reply": "2022-07-13T16:59:07.610648Z" + }, + "papermill": { + "duration": 0.238686, + "end_time": "2022-07-13T16:59:07.610792", + "exception": false, + "start_time": "2022-07-13T16:59:07.372106", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'StepName': 'AbaloneMSEFail',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 58, 50, 492000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 58, 51, 63000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Failed',\n", + " 'AttemptCount': 0,\n", + " 'FailureReason': 'Execution failed due to MSE > 3.0',\n", + " 'Metadata': {'Fail': {'ErrorMessage': 'Execution failed due to MSE > 3.0'}}},\n", + " {'StepName': 'AbaloneMSECond',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 58, 49, 539000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 58, 49, 926000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'Condition': {'Outcome': 'False'}}},\n", + " {'StepName': 'AbaloneEval',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 54, 6, 136000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 58, 49, 30000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-hswk7n1v1oju-abaloneeval-z6banrjbgd'}}},\n", + " {'StepName': 'AbaloneTrain',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 50, 55, 273000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 54, 5, 654000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:training-job/pipelines-hswk7n1v1oju-abalonetrain-ig59hd6pjy'}}},\n", + " {'StepName': 'AbaloneProcess',\n", + " 'StartTime': datetime.datetime(2022, 7, 13, 16, 46, 1, 213000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2022, 7, 13, 16, 50, 54, 782000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'AttemptCount': 0,\n", + " 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-west-2:000000000000:processing-job/pipelines-hswk7n1v1oju-abaloneprocess-npvk1jznbk'}}}]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.list_steps()" + ] + } + ], + "metadata": { + "instance_type": "ml.t3.medium", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "papermill": { + "default_parameters": {}, + "duration": 3085.109473, + "end_time": "2022-07-13T16:59:08.278091", + "environment_variables": {}, + "exception": null, + "input_path": "sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb", + "output_path": "/opt/ml/processing/output/sagemaker-pipelines-preprocess-train-evaluate-batch-transform-2022-07-13-15-54-21.ipynb", + "parameters": { + "kms_key": "arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab" + }, + "start_time": "2022-07-13T16:07:43.168618", + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb new file mode 100644 index 0000000000..fa431f8a44 --- /dev/null +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb @@ -0,0 +1,1920 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0425e1c2", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:52.158481Z", + "iopub.status.busy": "2022-07-13T15:58:52.157838Z", + "iopub.status.idle": "2022-07-13T15:58:52.159725Z", + "shell.execute_reply": "2022-07-13T15:58:52.160093Z" + }, + "papermill": { + "duration": 0.021573, + "end_time": "2022-07-13T15:58:52.160240", + "exception": false, + "start_time": "2022-07-13T15:58:52.138667", + "status": "completed" + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "kms_key = \"arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab\"" + ] + }, + { + "cell_type": "markdown", + "id": "66b60a2b", + "metadata": { + "papermill": { + "duration": 0.015296, + "end_time": "2022-07-13T15:58:52.191291", + "exception": false, + "start_time": "2022-07-13T15:58:52.175995", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# SageMaker Pipelines Lambda Step\n", + "\n", + "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline.\n", + "\n", + "The steps in this pipeline include:\n", + "* Preprocess the Abalone dataset\n", + "* Train an XGBoost Model\n", + "* Evaluate the model performance\n", + "* Create a model\n", + "* Deploy the model to a SageMaker Hosted Endpoint using a Lambda Function, through SageMaker Pipelines\n", + "\n", + "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step." + ] + }, + { + "cell_type": "markdown", + "id": "3c741e28", + "metadata": { + "papermill": { + "duration": 0.015223, + "end_time": "2022-07-13T15:58:52.221806", + "exception": false, + "start_time": "2022-07-13T15:58:52.206583", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Runtime\n", + "\n", + "This notebook takes approximately 15 minutes to run.\n", + "\n", + "## Contents\n", + "\n", + "1. [Prerequisites](#Prerequisites)\n", + "1. [Configuration Setup](#Configuration-Setup)\n", + "1. [Data Preparation](#Data-Preparation)\n", + "1. [Model Training and Evaluation](#Model-Training-and-Evaluation)\n", + "1. [Setting up Lambda](#Setting-up-Lambda)\n", + "1. [Execute the Pipeline](#Execute-the-Pipeline)\n", + "1. [Clean up resources](#Clean-up-resources)" + ] + }, + { + "cell_type": "markdown", + "id": "c5a68c81", + "metadata": { + "papermill": { + "duration": 0.015125, + "end_time": "2022-07-13T15:58:52.252140", + "exception": false, + "start_time": "2022-07-13T15:58:52.237015", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Prerequisites\n", + "\n", + "The notebook execution role should have policies which enable the notebook to create a Lambda function. The Amazon managed policy `AmazonSageMakerPipelinesIntegrations` can be added to the notebook execution role to achieve the same effect.\n", + "\n", + "The policy description is as follows:\n", + "\n", + "```\n", + "\n", + "{\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"lambda:CreateFunction\",\n", + " \"lambda:DeleteFunction\",\n", + " \"lambda:InvokeFunction\",\n", + " \"lambda:UpdateFunctionCode\"\n", + " ],\n", + " \"Resource\": [\n", + " \"arn:aws:lambda:*:*:function:*sagemaker*\",\n", + " \"arn:aws:lambda:*:*:function:*sageMaker*\",\n", + " \"arn:aws:lambda:*:*:function:*SageMaker*\"\n", + " ]\n", + " },\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"sqs:CreateQueue\",\n", + " \"sqs:SendMessage\"\n", + " ],\n", + " \"Resource\": [\n", + " \"arn:aws:sqs:*:*:*sagemaker*\",\n", + " \"arn:aws:sqs:*:*:*sageMaker*\",\n", + " \"arn:aws:sqs:*:*:*SageMaker*\"\n", + " ]\n", + " },\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"iam:PassRole\"\n", + " ],\n", + " \"Resource\": \"arn:aws:iam::*:role/*\",\n", + " \"Condition\": {\n", + " \"StringEquals\": {\n", + " \"iam:PassedToService\": [\n", + " \"lambda.amazonaws.com\"\n", + " ]\n", + " }\n", + " }\n", + " }\n", + " ]\n", + "}\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "09611418", + "metadata": { + "papermill": { + "duration": 0.015163, + "end_time": "2022-07-13T15:58:52.282544", + "exception": false, + "start_time": "2022-07-13T15:58:52.267381", + "status": "completed" + }, + "pycharm": { + "name": "#%% md\n" + }, + "tags": [] + }, + "source": [ + "Let's start by importing necessary packages and installing the SageMaker Python SDK." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b7a871ec", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:52.322697Z", + "iopub.status.busy": "2022-07-13T15:58:52.321906Z", + "iopub.status.idle": "2022-07-13T15:58:52.827711Z", + "shell.execute_reply": "2022-07-13T15:58:52.827193Z" + }, + "papermill": { + "duration": 0.52726, + "end_time": "2022-07-13T15:58:52.827823", + "exception": false, + "start_time": "2022-07-13T15:58:52.300563", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import boto3\n", + "import sagemaker\n", + "\n", + "from sagemaker.estimator import Estimator\n", + "from sagemaker.inputs import TrainingInput\n", + "\n", + "from sagemaker.processing import (\n", + " ProcessingInput,\n", + " ProcessingOutput,\n", + " Processor,\n", + " ScriptProcessor,\n", + ")\n", + "\n", + "from sagemaker import Model\n", + "from sagemaker.xgboost import XGBoostPredictor\n", + "from sagemaker.sklearn.processing import SKLearnProcessor\n", + "\n", + "from sagemaker.workflow.parameters import (\n", + " ParameterInteger,\n", + " ParameterString,\n", + ")\n", + "from sagemaker.workflow.pipeline import Pipeline\n", + "from sagemaker.workflow.properties import PropertyFile\n", + "from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CacheConfig\n", + "from sagemaker.workflow.lambda_step import (\n", + " LambdaStep,\n", + " LambdaOutput,\n", + " LambdaOutputTypeEnum,\n", + ")\n", + "from sagemaker.workflow.model_step import ModelStep\n", + "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", + "from sagemaker.workflow.condition_step import ConditionStep\n", + "from sagemaker.workflow.functions import JsonGet\n", + "from sagemaker.workflow.pipeline_context import PipelineSession\n", + "\n", + "from sagemaker.lambda_helper import Lambda\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "481497d6", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:52.862463Z", + "iopub.status.busy": "2022-07-13T15:58:52.861935Z", + "iopub.status.idle": "2022-07-13T15:58:54.581086Z", + "shell.execute_reply": "2022-07-13T15:58:54.581465Z" + }, + "papermill": { + "duration": 1.738298, + "end_time": "2022-07-13T15:58:54.581614", + "exception": false, + "start_time": "2022-07-13T15:58:52.843316", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", + " from cryptography.utils import int_from_bytes\r\n", + "/opt/conda/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\r\n", + " from cryptography.utils import int_from_bytes\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sagemaker>=2.99.0 in /opt/conda/lib/python3.7/site-packages (2.99.0)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: protobuf3-to-dict<1.0,>=0.1.5 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.1.5)\r\n", + "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.2.0)\r\n", + "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.0.1)\r\n", + "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.21.1)\r\n", + "Requirement already satisfied: boto3<2.0,>=1.20.21 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.20.47)\r\n", + "Requirement already satisfied: attrs<22,>=20.3.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (21.4.0)\r\n", + "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.0.1)\r\n", + "Requirement already satisfied: importlib-metadata<5.0,>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (1.5.0)\r\n", + "Requirement already satisfied: protobuf<4.0,>=3.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (3.17.3)\r\n", + "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (0.2.8)\r\n", + "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker>=2.99.0) (20.1)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: botocore<1.24.0,>=1.23.47 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (1.23.47)\r\n", + "Requirement already satisfied: s3transfer<0.6.0,>=0.5.0 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (0.5.0)\r\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (0.10.0)\r\n", + "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.47->boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (1.26.6)\r\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/conda/lib/python3.7/site-packages (from botocore<1.24.0,>=1.23.47->boto3<2.0,>=1.20.21->sagemaker>=2.99.0) (2.8.1)\r\n", + "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker>=2.99.0) (2.2.0)\r\n", + "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.99.0) (1.14.0)\r\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging>=20.0->sagemaker>=2.99.0) (2.4.6)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker>=2.99.0) (2019.3)\r\n", + "Requirement already satisfied: ppft>=1.6.6.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (1.6.6.4)\r\n", + "Requirement already satisfied: pox>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.3.0)\r\n", + "Requirement already satisfied: multiprocess>=0.70.12 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.70.12.2)\r\n", + "Requirement already satisfied: dill>=0.3.4 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker>=2.99.0) (0.3.4)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n", + "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n" + ] + } + ], + "source": [ + "!{sys.executable} -m pip install \"sagemaker>=2.99.0\"" + ] + }, + { + "cell_type": "markdown", + "id": "dda7b3e5", + "metadata": { + "papermill": { + "duration": 0.017215, + "end_time": "2022-07-13T15:58:54.616067", + "exception": false, + "start_time": "2022-07-13T15:58:54.598852", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Configuration Setup" + ] + }, + { + "cell_type": "markdown", + "id": "2ce0242d", + "metadata": { + "papermill": { + "duration": 0.016806, + "end_time": "2022-07-13T15:58:54.649817", + "exception": false, + "start_time": "2022-07-13T15:58:54.633011", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Let's now configure the setup we need, which includes the session object from the SageMaker Python SDK, and neccessary configurations for the pipelines, such as object types, input and output buckets and so on." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a55e1178", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:54.696396Z", + "iopub.status.busy": "2022-07-13T15:58:54.695661Z", + "iopub.status.idle": "2022-07-13T15:58:54.900760Z", + "shell.execute_reply": "2022-07-13T15:58:54.900289Z" + }, + "papermill": { + "duration": 0.234346, + "end_time": "2022-07-13T15:58:54.900874", + "exception": false, + "start_time": "2022-07-13T15:58:54.666528", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Create the SageMaker Session\n", + "\n", + "sagemaker_session = sagemaker.Session()\n", + "pipeline_session = PipelineSession()\n", + "sm_client = sagemaker_session.sagemaker_client\n", + "region = sagemaker_session.boto_region_name\n", + "prefix = \"lambda-step-pipeline\"\n", + "\n", + "account_id = sagemaker_session.account_id()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1832cf2e", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:54.947742Z", + "iopub.status.busy": "2022-07-13T15:58:54.947270Z", + "iopub.status.idle": "2022-07-13T15:58:55.684395Z", + "shell.execute_reply": "2022-07-13T15:58:55.683908Z" + }, + "papermill": { + "duration": 0.766426, + "end_time": "2022-07-13T15:58:55.684513", + "exception": false, + "start_time": "2022-07-13T15:58:54.918087", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Define variables and parameters needed for the Pipeline steps\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "default_bucket = sagemaker_session.default_bucket()\n", + "base_job_prefix = \"lambda-step-example\"\n", + "s3_prefix = \"lambda-step-pipeline\"\n", + "\n", + "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", + "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "model_approval_status = ParameterString(\n", + " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", + ")\n", + "input_data = ParameterString(\n", + " name=\"InputDataUrl\",\n", + " default_value=f\"s3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv\",\n", + ")\n", + "model_approval_status = ParameterString(\n", + " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", + ")\n", + "\n", + "# Cache Pipeline steps to reduce execution time on subsequent executions\n", + "cache_config = CacheConfig(enable_caching=True, expire_after=\"30d\")" + ] + }, + { + "cell_type": "markdown", + "id": "ba7cd3ba", + "metadata": { + "papermill": { + "duration": 0.016885, + "end_time": "2022-07-13T15:58:55.718428", + "exception": false, + "start_time": "2022-07-13T15:58:55.701543", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Data Preparation\n", + "\n", + "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets.\n", + "\n", + "The output of this step is used as the input to the TrainingStep." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6ef53ec4", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:55.759267Z", + "iopub.status.busy": "2022-07-13T15:58:55.755957Z", + "iopub.status.idle": "2022-07-13T15:58:55.907503Z", + "shell.execute_reply": "2022-07-13T15:58:55.907053Z" + }, + "papermill": { + "duration": 0.172373, + "end_time": "2022-07-13T15:58:55.907627", + "exception": false, + "start_time": "2022-07-13T15:58:55.735254", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "!mkdir -p code" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ea68f51c", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:55.948608Z", + "iopub.status.busy": "2022-07-13T15:58:55.947461Z", + "iopub.status.idle": "2022-07-13T15:58:55.951620Z", + "shell.execute_reply": "2022-07-13T15:58:55.951193Z" + }, + "papermill": { + "duration": 0.026484, + "end_time": "2022-07-13T15:58:55.951738", + "exception": false, + "start_time": "2022-07-13T15:58:55.925254", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing code/preprocess.py\n" + ] + } + ], + "source": [ + "%%writefile code/preprocess.py\n", + "\n", + "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", + "import argparse\n", + "import logging\n", + "import os\n", + "import pathlib\n", + "import requests\n", + "import tempfile\n", + "\n", + "import boto3\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.INFO)\n", + "logger.addHandler(logging.StreamHandler())\n", + "\n", + "\n", + "# Since we get a headerless CSV file we specify the column names here.\n", + "feature_columns_names = [\n", + " \"sex\",\n", + " \"length\",\n", + " \"diameter\",\n", + " \"height\",\n", + " \"whole_weight\",\n", + " \"shucked_weight\",\n", + " \"viscera_weight\",\n", + " \"shell_weight\",\n", + "]\n", + "label_column = \"rings\"\n", + "\n", + "feature_columns_dtype = {\n", + " \"sex\": str,\n", + " \"length\": np.float64,\n", + " \"diameter\": np.float64,\n", + " \"height\": np.float64,\n", + " \"whole_weight\": np.float64,\n", + " \"shucked_weight\": np.float64,\n", + " \"viscera_weight\": np.float64,\n", + " \"shell_weight\": np.float64,\n", + "}\n", + "label_column_dtype = {\"rings\": np.float64}\n", + "\n", + "\n", + "def merge_two_dicts(x, y):\n", + " \"\"\"Merges two dicts, returning a new copy.\"\"\"\n", + " z = x.copy()\n", + " z.update(y)\n", + " return z\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " logger.debug(\"Starting preprocessing.\")\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument(\"--input-data\", type=str, required=True)\n", + " args = parser.parse_args()\n", + "\n", + " base_dir = \"/opt/ml/processing\"\n", + " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", + " input_data = args.input_data\n", + " bucket = input_data.split(\"/\")[2]\n", + " key = \"/\".join(input_data.split(\"/\")[3:])\n", + "\n", + " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", + " fn = f\"{base_dir}/data/abalone-dataset.csv\"\n", + " s3 = boto3.resource(\"s3\")\n", + " s3.Bucket(bucket).download_file(key, fn)\n", + "\n", + " logger.debug(\"Reading downloaded data.\")\n", + " df = pd.read_csv(\n", + " fn,\n", + " header=None,\n", + " names=feature_columns_names + [label_column],\n", + " dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n", + " )\n", + " os.unlink(fn)\n", + "\n", + " logger.debug(\"Defining transformers.\")\n", + " numeric_features = list(feature_columns_names)\n", + " numeric_features.remove(\"sex\")\n", + " numeric_transformer = Pipeline(\n", + " steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"scaler\", StandardScaler()),\n", + " ]\n", + " )\n", + "\n", + " categorical_features = [\"sex\"]\n", + " categorical_transformer = Pipeline(\n", + " steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", + " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n", + " ]\n", + " )\n", + "\n", + " preprocess = ColumnTransformer(\n", + " transformers=[\n", + " (\"num\", numeric_transformer, numeric_features),\n", + " (\"cat\", categorical_transformer, categorical_features),\n", + " ]\n", + " )\n", + "\n", + " logger.info(\"Applying transforms.\")\n", + " y = df.pop(\"rings\")\n", + " X_pre = preprocess.fit_transform(df)\n", + " y_pre = y.to_numpy().reshape(len(y), 1)\n", + "\n", + " X = np.concatenate((y_pre, X_pre), axis=1)\n", + "\n", + " logger.info(\"Splitting %d rows of data into train, validation, test datasets.\", len(X))\n", + " np.random.shuffle(X)\n", + " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", + "\n", + " logger.info(\"Writing out datasets to %s.\", base_dir)\n", + " pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n", + " pd.DataFrame(validation).to_csv(\n", + " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", + " )\n", + " pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5cc7e807", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:55.993872Z", + "iopub.status.busy": "2022-07-13T15:58:55.992990Z", + "iopub.status.idle": "2022-07-13T15:58:56.276627Z", + "shell.execute_reply": "2022-07-13T15:58:56.277057Z" + }, + "papermill": { + "duration": 0.307809, + "end_time": "2022-07-13T15:58:56.277208", + "exception": false, + "start_time": "2022-07-13T15:58:55.969399", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline_context.py:197: UserWarning: Running within a PipelineSession, there will be No Wait, No Logs, and No Job being started.\n", + " UserWarning,\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005\n", + "Inputs: [{'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/input/code/preprocess.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'train', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/train', 'LocalPath': '/opt/ml/processing/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'validation', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/validation', 'LocalPath': '/opt/ml/processing/validation', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'test', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/test', 'LocalPath': '/opt/ml/processing/test', 'S3UploadMode': 'EndOfJob'}}]\n" + ] + } + ], + "source": [ + "# Process the training data step using a python script.\n", + "# Split the training data set into train, test, and validation datasets\n", + "\n", + "sklearn_processor = SKLearnProcessor(\n", + " framework_version=\"0.23-1\",\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=processing_instance_count,\n", + " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")\n", + "\n", + "processor_args = sklearn_processor.run(\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"code/preprocess.py\",\n", + " arguments=[\"--input-data\", input_data],\n", + ")\n", + "\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneData\",\n", + " step_args=processor_args,\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ff61dc5a", + "metadata": { + "papermill": { + "duration": 0.017909, + "end_time": "2022-07-13T15:58:56.313344", + "exception": false, + "start_time": "2022-07-13T15:58:56.295435", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Model Training and Evaluation\n", + "\n", + "We will now train an XGBoost model using the SageMaker Python SDK and the output of the ProcessingStep." + ] + }, + { + "cell_type": "markdown", + "id": "bda6892e", + "metadata": { + "papermill": { + "duration": 0.01789, + "end_time": "2022-07-13T15:58:56.350320", + "exception": false, + "start_time": "2022-07-13T15:58:56.332430", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Training the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a575d6e3", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.394662Z", + "iopub.status.busy": "2022-07-13T15:58:56.394093Z", + "iopub.status.idle": "2022-07-13T15:58:56.421180Z", + "shell.execute_reply": "2022-07-13T15:58:56.421606Z" + }, + "papermill": { + "duration": 0.053298, + "end_time": "2022-07-13T15:58:56.421753", + "exception": false, + "start_time": "2022-07-13T15:58:56.368455", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/steps.py:391: UserWarning: Profiling is enabled on the provided estimator. The default profiler rule includes a timestamp which will change each time the pipeline is upserted, causing cache misses. If profiling is not needed, set disable_profiler to True on the estimator.\n", + " warnings.warn(msg)\n" + ] + } + ], + "source": [ + "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n", + "model_path = f\"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain\"\n", + "\n", + "image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=\"ml.m5.xlarge\",\n", + ")\n", + "\n", + "xgb_train = Estimator(\n", + " image_uri=image_uri,\n", + " instance_type=training_instance_type,\n", + " instance_count=1,\n", + " output_path=model_path,\n", + " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")\n", + "\n", + "xgb_train.set_hyperparameters(\n", + " objective=\"reg:linear\",\n", + " num_round=50,\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.7,\n", + " silent=0,\n", + ")\n", + "\n", + "train_args = xgb_train.fit(\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"validation\"\n", + " ].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " },\n", + ")\n", + "\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " step_args=train_args,\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9c40566b", + "metadata": { + "papermill": { + "duration": 0.018193, + "end_time": "2022-07-13T15:58:56.458560", + "exception": false, + "start_time": "2022-07-13T15:58:56.440367", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Evaluating the model\n", + "\n", + "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model is created and a Lambda function is invoked to deploy the model to a SageMaker Endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1545156e", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.499375Z", + "iopub.status.busy": "2022-07-13T15:58:56.498823Z", + "iopub.status.idle": "2022-07-13T15:58:56.501906Z", + "shell.execute_reply": "2022-07-13T15:58:56.501502Z" + }, + "papermill": { + "duration": 0.02515, + "end_time": "2022-07-13T15:58:56.502014", + "exception": false, + "start_time": "2022-07-13T15:58:56.476864", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing code/evaluate.py\n" + ] + } + ], + "source": [ + "%%writefile code/evaluate.py\n", + "\n", + "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", + "import json\n", + "import logging\n", + "import pathlib\n", + "import pickle\n", + "import tarfile\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xgboost\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.INFO)\n", + "logger.addHandler(logging.StreamHandler())\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " logger.debug(\"Starting evaluation.\")\n", + " model_path = \"/opt/ml/processing/model/model.tar.gz\"\n", + " with tarfile.open(model_path) as tar:\n", + " tar.extractall(path=\".\")\n", + "\n", + " logger.debug(\"Loading xgboost model.\")\n", + " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", + "\n", + " logger.debug(\"Reading test data.\")\n", + " test_path = \"/opt/ml/processing/test/test.csv\"\n", + " df = pd.read_csv(test_path, header=None)\n", + "\n", + " logger.debug(\"Reading test data.\")\n", + " y_test = df.iloc[:, 0].to_numpy()\n", + " df.drop(df.columns[0], axis=1, inplace=True)\n", + " X_test = xgboost.DMatrix(df.values)\n", + "\n", + " logger.info(\"Performing predictions against test data.\")\n", + " predictions = model.predict(X_test)\n", + "\n", + " logger.debug(\"Calculating mean squared error.\")\n", + " mse = mean_squared_error(y_test, predictions)\n", + " std = np.std(y_test - predictions)\n", + " report_dict = {\n", + " \"regression_metrics\": {\n", + " \"mse\": {\"value\": mse, \"standard_deviation\": std},\n", + " },\n", + " }\n", + "\n", + " output_dir = \"/opt/ml/processing/evaluation\"\n", + " pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n", + "\n", + " logger.info(\"Writing out evaluation report with mse: %f\", mse)\n", + " evaluation_path = f\"{output_dir}/evaluation.json\"\n", + " with open(evaluation_path, \"w\") as f:\n", + " f.write(json.dumps(report_dict))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "edd37d25", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.549199Z", + "iopub.status.busy": "2022-07-13T15:58:56.548644Z", + "iopub.status.idle": "2022-07-13T15:58:56.640043Z", + "shell.execute_reply": "2022-07-13T15:58:56.640488Z" + }, + "papermill": { + "duration": 0.119643, + "end_time": "2022-07-13T15:58:56.640642", + "exception": false, + "start_time": "2022-07-13T15:58:56.520999", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: lambda-step-pipeline/lambda-step-exampl-2022-07-13-15-58-56-545\n", + "Inputs: [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': , 'LocalPath': '/opt/ml/processing/model', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-2', 'AppManaged': False, 'S3Input': {'S3Uri': , 'LocalPath': '/opt/ml/processing/test', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/lambda-step-exampl-2022-07-13-15-58-56-545/input/code/evaluate.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'evaluation', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/evaluation_report', 'LocalPath': '/opt/ml/processing/evaluation', 'S3UploadMode': 'EndOfJob'}}]\n" + ] + } + ], + "source": [ + "# A ProcessingStep is used to evaluate the performance of the trained model.\n", + "# Based on the results of the evaluation, the model is created and deployed.\n", + "\n", + "script_eval = ScriptProcessor(\n", + " image_uri=image_uri,\n", + " command=[\"python3\"],\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=1,\n", + " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + ")\n", + "\n", + "evaluation_report = PropertyFile(\n", + " name=\"AbaloneEvaluationReport\",\n", + " output_name=\"evaluation\",\n", + " path=\"evaluation.json\",\n", + ")\n", + "\n", + "eval_args = script_eval.run(\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(\n", + " output_name=\"evaluation\",\n", + " source=\"/opt/ml/processing/evaluation\",\n", + " destination=f\"s3://{default_bucket}/{s3_prefix}/evaluation_report\",\n", + " ),\n", + " ],\n", + " code=\"code/evaluate.py\",\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " step_args=eval_args,\n", + " property_files=[evaluation_report],\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ce969364", + "metadata": { + "papermill": { + "duration": 0.018851, + "end_time": "2022-07-13T15:58:56.678471", + "exception": false, + "start_time": "2022-07-13T15:58:56.659620", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Creating the final model object\n", + "\n", + "The model is created and the name of the model is provided to the Lambda function for deployment. The `CreateModelStep` dynamically assigns a name to the model." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8017b70a", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.721922Z", + "iopub.status.busy": "2022-07-13T15:58:56.721196Z", + "iopub.status.idle": "2022-07-13T15:58:56.723177Z", + "shell.execute_reply": "2022-07-13T15:58:56.723545Z" + }, + "papermill": { + "duration": 0.026308, + "end_time": "2022-07-13T15:58:56.723689", + "exception": false, + "start_time": "2022-07-13T15:58:56.697381", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Create Model\n", + "model = Model(\n", + " image_uri=image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " sagemaker_session=pipeline_session,\n", + " role=role,\n", + " predictor_cls=XGBoostPredictor,\n", + ")\n", + "\n", + "step_create_model = ModelStep(\n", + " name=\"CreateModel\",\n", + " step_args=model.create(\"ml.m4.large\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e8d94f5b", + "metadata": { + "papermill": { + "duration": 0.018913, + "end_time": "2022-07-13T15:58:56.761761", + "exception": false, + "start_time": "2022-07-13T15:58:56.742848", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Setting up Lambda\n", + "\n", + "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function.\n", + "\n", + "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n", + "\n", + "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary." + ] + }, + { + "cell_type": "markdown", + "id": "f813825f", + "metadata": { + "papermill": { + "duration": 0.018947, + "end_time": "2022-07-13T15:58:56.799670", + "exception": false, + "start_time": "2022-07-13T15:58:56.780723", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Define the Lambda function\n", + "\n", + "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the Lambda step." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c3782959", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.842251Z", + "iopub.status.busy": "2022-07-13T15:58:56.841631Z", + "iopub.status.idle": "2022-07-13T15:58:56.844526Z", + "shell.execute_reply": "2022-07-13T15:58:56.844085Z" + }, + "papermill": { + "duration": 0.026015, + "end_time": "2022-07-13T15:58:56.844646", + "exception": false, + "start_time": "2022-07-13T15:58:56.818631", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing code/lambda_helper.py\n" + ] + } + ], + "source": [ + "%%writefile code/lambda_helper.py\n", + "\n", + "\"\"\"\n", + "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint.\n", + "The name of the model to deploy is provided via the `event` argument\n", + "\"\"\"\n", + "\n", + "import json\n", + "import boto3\n", + "\n", + "\n", + "def lambda_handler(event, context):\n", + " \"\"\" \"\"\"\n", + " sm_client = boto3.client(\"sagemaker\")\n", + "\n", + " # The name of the model created in the Pipeline CreateModelStep\n", + " model_name = event[\"model_name\"]\n", + "\n", + " endpoint_config_name = event[\"endpoint_config_name\"]\n", + " endpoint_name = event[\"endpoint_name\"]\n", + "\n", + " create_endpoint_config_response = sm_client.create_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name,\n", + " ProductionVariants=[\n", + " {\n", + " \"InstanceType\": \"ml.m4.xlarge\",\n", + " \"InitialVariantWeight\": 1,\n", + " \"InitialInstanceCount\": 1,\n", + " \"ModelName\": model_name,\n", + " \"VariantName\": \"AllTraffic\",\n", + " }\n", + " ],\n", + " )\n", + "\n", + " create_endpoint_response = sm_client.create_endpoint(\n", + " EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name\n", + " )\n", + "\n", + " return {\n", + " \"statusCode\": 200,\n", + " \"body\": json.dumps(\"Created Endpoint!\"),\n", + " \"other_key\": \"example_value\",\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "7835966e", + "metadata": { + "papermill": { + "duration": 0.019367, + "end_time": "2022-07-13T15:58:56.883384", + "exception": false, + "start_time": "2022-07-13T15:58:56.864017", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Setting up the custom IAM Role\n", + "\n", + "The Lambda function needs an IAM role that allows it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep.\n", + "\n", + "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies.\n", + "\n", + "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy with least privileges as per AWS IAM best practices." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "928270f8", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:56.926249Z", + "iopub.status.busy": "2022-07-13T15:58:56.925421Z", + "iopub.status.idle": "2022-07-13T15:58:57.385000Z", + "shell.execute_reply": "2022-07-13T15:58:57.384557Z" + }, + "papermill": { + "duration": 0.482377, + "end_time": "2022-07-13T15:58:57.385112", + "exception": false, + "start_time": "2022-07-13T15:58:56.902735", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using ARN from existing role: lambda-deployment-role\n" + ] + } + ], + "source": [ + "from iam_helper import create_lambda_role\n", + "\n", + "lambda_role = create_lambda_role(\"lambda-deployment-role\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "430e04f4", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:57.431162Z", + "iopub.status.busy": "2022-07-13T15:58:57.430310Z", + "iopub.status.idle": "2022-07-13T15:58:57.540082Z", + "shell.execute_reply": "2022-07-13T15:58:57.540527Z" + }, + "papermill": { + "duration": 0.135811, + "end_time": "2022-07-13T15:58:57.540673", + "exception": false, + "start_time": "2022-07-13T15:58:57.404862", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Custom Lambda Step\n", + "\n", + "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", + "model_name = \"demo-lambda-model\" + current_time\n", + "endpoint_config_name = \"demo-lambda-deploy-endpoint-config-\" + current_time\n", + "endpoint_name = \"demo-lambda-deploy-endpoint-\" + current_time\n", + "\n", + "function_name = \"sagemaker-lambda-step-endpoint-deploy-\" + current_time\n", + "\n", + "# Lambda helper class can be used to create the Lambda function\n", + "func = Lambda(\n", + " function_name=function_name,\n", + " execution_role_arn=lambda_role,\n", + " script=\"code/lambda_helper.py\",\n", + " handler=\"lambda_helper.lambda_handler\",\n", + ")\n", + "\n", + "output_param_1 = LambdaOutput(output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String)\n", + "output_param_2 = LambdaOutput(output_name=\"body\", output_type=LambdaOutputTypeEnum.String)\n", + "output_param_3 = LambdaOutput(output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String)\n", + "\n", + "step_deploy_lambda = LambdaStep(\n", + " name=\"LambdaStep\",\n", + " lambda_func=func,\n", + " inputs={\n", + " \"model_name\": step_create_model.properties.ModelName,\n", + " \"endpoint_config_name\": endpoint_config_name,\n", + " \"endpoint_name\": endpoint_name,\n", + " },\n", + " outputs=[output_param_1, output_param_2, output_param_3],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5c6c4475", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:57.584145Z", + "iopub.status.busy": "2022-07-13T15:58:57.583668Z", + "iopub.status.idle": "2022-07-13T15:58:57.585521Z", + "shell.execute_reply": "2022-07-13T15:58:57.585892Z" + }, + "papermill": { + "duration": 0.025694, + "end_time": "2022-07-13T15:58:57.586019", + "exception": false, + "start_time": "2022-07-13T15:58:57.560325", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ConditionStep for evaluating model quality and branching execution.\n", + "# The `json_path` value is based on the `report_dict` variable in `evaluate.py`\n", + "\n", + "cond_lte = ConditionLessThanOrEqualTo(\n", + " left=JsonGet(\n", + " step_name=step_eval.name,\n", + " property_file=evaluation_report,\n", + " json_path=\"regression_metrics.mse.value\",\n", + " ),\n", + " right=6.0,\n", + ")\n", + "\n", + "step_cond = ConditionStep(\n", + " name=\"CheckMSEAbaloneEvaluation\",\n", + " conditions=[cond_lte],\n", + " if_steps=[step_create_model, step_deploy_lambda],\n", + " else_steps=[],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "36c4a4c5", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:57.629412Z", + "iopub.status.busy": "2022-07-13T15:58:57.628880Z", + "iopub.status.idle": "2022-07-13T15:58:57.630967Z", + "shell.execute_reply": "2022-07-13T15:58:57.630510Z" + }, + "papermill": { + "duration": 0.025542, + "end_time": "2022-07-13T15:58:57.631070", + "exception": false, + "start_time": "2022-07-13T15:58:57.605528", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Use the same pipeline name across executions for cache usage.\n", + "\n", + "pipeline_name = \"lambda-step-pipeline\" + current_time\n", + "\n", + "pipeline = Pipeline(\n", + " name=pipeline_name,\n", + " parameters=[\n", + " processing_instance_count,\n", + " training_instance_type,\n", + " input_data,\n", + " model_approval_status,\n", + " ],\n", + " steps=[step_process, step_train, step_eval, step_cond],\n", + " sagemaker_session=pipeline_session,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bc75af4c", + "metadata": { + "papermill": { + "duration": 0.019407, + "end_time": "2022-07-13T15:58:57.670110", + "exception": false, + "start_time": "2022-07-13T15:58:57.650703", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Execute the Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0183c74f", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:57.713276Z", + "iopub.status.busy": "2022-07-13T15:58:57.712514Z", + "iopub.status.idle": "2022-07-13T15:58:57.996122Z", + "shell.execute_reply": "2022-07-13T15:58:57.995686Z" + }, + "papermill": { + "duration": 0.30664, + "end_time": "2022-07-13T15:58:57.996232", + "exception": false, + "start_time": "2022-07-13T15:58:57.689592", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Version': '2020-12-01',\n", + " 'Metadata': {},\n", + " 'Parameters': [{'Name': 'ProcessingInstanceCount',\n", + " 'Type': 'Integer',\n", + " 'DefaultValue': 1},\n", + " {'Name': 'TrainingInstanceType',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 'ml.m5.xlarge'},\n", + " {'Name': 'InputDataUrl',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 's3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv'},\n", + " {'Name': 'ModelApprovalStatus',\n", + " 'Type': 'String',\n", + " 'DefaultValue': 'PendingManualApproval'}],\n", + " 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},\n", + " 'TrialName': {'Get': 'Execution.PipelineExecutionId'}},\n", + " 'Steps': [{'Name': 'PreprocessAbaloneData',\n", + " 'Type': 'Processing',\n", + " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',\n", + " 'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},\n", + " 'VolumeSizeInGB': 30}},\n", + " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3',\n", + " 'ContainerArguments': ['--input-data',\n", + " {'Get': 'Parameters.InputDataUrl'}],\n", + " 'ContainerEntrypoint': ['python3',\n", + " '/opt/ml/processing/input/code/preprocess.py']},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'ProcessingInputs': [{'InputName': 'code',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/input/code/preprocess.py',\n", + " 'LocalPath': '/opt/ml/processing/input/code',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}}],\n", + " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'train',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/train',\n", + " 'LocalPath': '/opt/ml/processing/train',\n", + " 'S3UploadMode': 'EndOfJob'}},\n", + " {'OutputName': 'validation',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/validation',\n", + " 'LocalPath': '/opt/ml/processing/validation',\n", + " 'S3UploadMode': 'EndOfJob'}},\n", + " {'OutputName': 'test',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/sklearn-abalone-pre-2022-07-13-15-58-56-005/output/test',\n", + " 'LocalPath': '/opt/ml/processing/test',\n", + " 'S3UploadMode': 'EndOfJob'}}]}},\n", + " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'}},\n", + " {'Name': 'TrainAbaloneModel',\n", + " 'Type': 'Training',\n", + " 'Arguments': {'AlgorithmSpecification': {'TrainingInputMode': 'File',\n", + " 'TrainingImage': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'},\n", + " 'OutputDataConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/AbaloneTrain'},\n", + " 'StoppingCondition': {'MaxRuntimeInSeconds': 86400},\n", + " 'ResourceConfig': {'VolumeSizeInGB': 30,\n", + " 'InstanceCount': 1,\n", + " 'InstanceType': {'Get': 'Parameters.TrainingInstanceType'}},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'InputDataConfig': [{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", + " 'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['train'].S3Output.S3Uri\"},\n", + " 'S3DataDistributionType': 'FullyReplicated'}},\n", + " 'ContentType': 'text/csv',\n", + " 'ChannelName': 'train'},\n", + " {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',\n", + " 'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['validation'].S3Output.S3Uri\"},\n", + " 'S3DataDistributionType': 'FullyReplicated'}},\n", + " 'ContentType': 'text/csv',\n", + " 'ChannelName': 'validation'}],\n", + " 'HyperParameters': {'objective': 'reg:linear',\n", + " 'num_round': '50',\n", + " 'max_depth': '5',\n", + " 'eta': '0.2',\n", + " 'gamma': '4',\n", + " 'min_child_weight': '6',\n", + " 'subsample': '0.7',\n", + " 'silent': '0'},\n", + " 'ProfilerRuleConfigurations': [{'RuleConfigurationName': 'ProfilerReport-1657727936',\n", + " 'RuleEvaluatorImage': '895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest',\n", + " 'RuleParameters': {'rule_to_invoke': 'ProfilerReport'}}],\n", + " 'ProfilerConfig': {'S3OutputPath': 's3://sagemaker-us-west-2-000000000000/lambda-step-example/AbaloneTrain'}},\n", + " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'}},\n", + " {'Name': 'EvaluateAbaloneModel',\n", + " 'Type': 'Processing',\n", + " 'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',\n", + " 'InstanceCount': 1,\n", + " 'VolumeSizeInGB': 30}},\n", + " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", + " 'ContainerEntrypoint': ['python3',\n", + " '/opt/ml/processing/input/code/evaluate.py']},\n", + " 'RoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'ProcessingInputs': [{'InputName': 'input-1',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': {'Get': 'Steps.TrainAbaloneModel.ModelArtifacts.S3ModelArtifacts'},\n", + " 'LocalPath': '/opt/ml/processing/model',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}},\n", + " {'InputName': 'input-2',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': {'Get': \"Steps.PreprocessAbaloneData.ProcessingOutputConfig.Outputs['test'].S3Output.S3Uri\"},\n", + " 'LocalPath': '/opt/ml/processing/test',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}},\n", + " {'InputName': 'code',\n", + " 'AppManaged': False,\n", + " 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/lambda-step-exampl-2022-07-13-15-58-56-545/input/code/evaluate.py',\n", + " 'LocalPath': '/opt/ml/processing/input/code',\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3InputMode': 'File',\n", + " 'S3DataDistributionType': 'FullyReplicated',\n", + " 'S3CompressionType': 'None'}}],\n", + " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'evaluation',\n", + " 'AppManaged': False,\n", + " 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-000000000000/lambda-step-pipeline/evaluation_report',\n", + " 'LocalPath': '/opt/ml/processing/evaluation',\n", + " 'S3UploadMode': 'EndOfJob'}}]}},\n", + " 'CacheConfig': {'Enabled': True, 'ExpireAfter': '30d'},\n", + " 'PropertyFiles': [{'PropertyFileName': 'AbaloneEvaluationReport',\n", + " 'OutputName': 'evaluation',\n", + " 'FilePath': 'evaluation.json'}]},\n", + " {'Name': 'CheckMSEAbaloneEvaluation',\n", + " 'Type': 'Condition',\n", + " 'Arguments': {'Conditions': [{'Type': 'LessThanOrEqualTo',\n", + " 'LeftValue': {'Std:JsonGet': {'PropertyFile': {'Get': 'Steps.EvaluateAbaloneModel.PropertyFiles.AbaloneEvaluationReport'},\n", + " 'Path': 'regression_metrics.mse.value'}},\n", + " 'RightValue': 6.0}],\n", + " 'IfSteps': [{'Name': 'CreateModel-CreateModel',\n", + " 'Type': 'Model',\n", + " 'Arguments': {'ExecutionRoleArn': 'arn:aws:iam::000000000000:role/SageMakerRole',\n", + " 'PrimaryContainer': {'Image': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',\n", + " 'Environment': {},\n", + " 'ModelDataUrl': {'Get': 'Steps.TrainAbaloneModel.ModelArtifacts.S3ModelArtifacts'}}}},\n", + " {'Name': 'LambdaStep',\n", + " 'Type': 'Lambda',\n", + " 'Arguments': {'model_name': {'Get': 'Steps.CreateModel-CreateModel.ModelName'},\n", + " 'endpoint_config_name': 'demo-lambda-deploy-endpoint-config-07-13-15-58-57',\n", + " 'endpoint_name': 'demo-lambda-deploy-endpoint-07-13-15-58-57'},\n", + " 'FunctionArn': 'arn:aws:lambda:us-west-2:000000000000:function:sagemaker-lambda-step-endpoint-deploy-07-13-15-58-57',\n", + " 'OutputParameters': [{'OutputName': 'statusCode',\n", + " 'OutputType': 'String'},\n", + " {'OutputName': 'body', 'OutputType': 'String'},\n", + " {'OutputName': 'other_key', 'OutputType': 'String'}]}],\n", + " 'ElseSteps': []}}]}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "definition = json.loads(pipeline.definition())\n", + "definition" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "468eb3d7", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:58.041896Z", + "iopub.status.busy": "2022-07-13T15:58:58.041369Z", + "iopub.status.idle": "2022-07-13T15:58:59.758190Z", + "shell.execute_reply": "2022-07-13T15:58:59.757766Z" + }, + "papermill": { + "duration": 1.74154, + "end_time": "2022-07-13T15:58:59.758302", + "exception": false, + "start_time": "2022-07-13T15:58:58.016762", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/lambda-step-pipeline07-13-15-58-57',\n", + " 'ResponseMetadata': {'RequestId': 'b8946d56-ca49-4275-add7-d48f765b4931',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': 'b8946d56-ca49-4275-add7-d48f765b4931',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '102',\n", + " 'date': 'Wed, 13 Jul 2022 15:58:59 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline.upsert(role_arn=role)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bd52d7e0", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:58:59.805754Z", + "iopub.status.busy": "2022-07-13T15:58:59.805233Z", + "iopub.status.idle": "2022-07-13T15:59:00.068810Z", + "shell.execute_reply": "2022-07-13T15:59:00.068289Z" + }, + "papermill": { + "duration": 0.289644, + "end_time": "2022-07-13T15:59:00.068954", + "exception": false, + "start_time": "2022-07-13T15:58:59.779310", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution = pipeline.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d30b7748", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T15:59:00.116180Z", + "iopub.status.busy": "2022-07-13T15:59:00.115665Z", + "iopub.status.idle": "2022-07-13T16:13:07.234138Z", + "shell.execute_reply": "2022-07-13T16:13:07.233620Z" + }, + "papermill": { + "duration": 847.144553, + "end_time": "2022-07-13T16:13:07.234262", + "exception": false, + "start_time": "2022-07-13T15:59:00.089709", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "ca40dc3a", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:13:07.286763Z", + "iopub.status.busy": "2022-07-13T16:13:07.286297Z", + "iopub.status.idle": "2022-07-13T16:16:07.831769Z", + "shell.execute_reply": "2022-07-13T16:16:07.831267Z" + }, + "papermill": { + "duration": 180.576174, + "end_time": "2022-07-13T16:16:07.831903", + "exception": false, + "start_time": "2022-07-13T16:13:07.255729", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Create a SageMaker client\n", + "sm_client = sagemaker.Session().sagemaker_client\n", + "\n", + "# Wait for the endpoint to be in service\n", + "waiter = sm_client.get_waiter(\"endpoint_in_service\")\n", + "waiter.wait(EndpointName=endpoint_name)" + ] + }, + { + "cell_type": "markdown", + "id": "66aa70a4", + "metadata": { + "papermill": { + "duration": 0.021213, + "end_time": "2022-07-13T16:16:07.874605", + "exception": false, + "start_time": "2022-07-13T16:16:07.853392", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Clean up resources\n", + "\n", + "Running the following cell will delete the following resources created in this notebook -\n", + "* SageMaker Model\n", + "* SageMaker Endpoint Configuration\n", + "* SageMaker Endpoint\n", + "* SageMaker Pipeline\n", + "* Lambda Function" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6b8f340a", + "metadata": { + "execution": { + "iopub.execute_input": "2022-07-13T16:16:07.926674Z", + "iopub.status.busy": "2022-07-13T16:16:07.926158Z", + "iopub.status.idle": "2022-07-13T16:16:08.804031Z", + "shell.execute_reply": "2022-07-13T16:16:08.804450Z" + }, + "papermill": { + "duration": 0.908752, + "end_time": "2022-07-13T16:16:08.804592", + "exception": false, + "start_time": "2022-07-13T16:16:07.895840", + "status": "completed" + }, + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-west-2:000000000000:pipeline/lambda-step-pipeline07-13-15-58-57',\n", + " 'ResponseMetadata': {'RequestId': 'f71a3ce5-bac1-4250-9129-4161507f62d4',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': 'f71a3ce5-bac1-4250-9129-4161507f62d4',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '102',\n", + " 'date': 'Wed, 13 Jul 2022 16:16:08 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available\n", + "# outside the Pipeline execution context so `step_create_model.properties.ModelName`\n", + "# cannot be used while deleting the model.\n", + "\n", + "model_name = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)[\n", + " \"ProductionVariants\"\n", + "][0][\"ModelName\"]\n", + "\n", + "# Delete the Model\n", + "sm_client.delete_model(ModelName=model_name)\n", + "\n", + "# Delete the EndpointConfig\n", + "sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)\n", + "\n", + "# Delete the Endpoint\n", + "sm_client.delete_endpoint(EndpointName=endpoint_name)\n", + "\n", + "# Delete the Lambda function\n", + "func.delete()\n", + "\n", + "# Delete the Pipeline\n", + "sm_client.delete_pipeline(PipelineName=pipeline_name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "metadata": { + "interpreter": { + "hash": "ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" + } + }, + "papermill": { + "default_parameters": {}, + "duration": 1037.911631, + "end_time": "2022-07-13T16:16:09.344184", + "environment_variables": {}, + "exception": null, + "input_path": "sagemaker-pipelines-lambda-step.ipynb", + "output_path": "/opt/ml/processing/output/sagemaker-pipelines-lambda-step-2022-07-13-15-54-23.ipynb", + "parameters": { + "kms_key": "arn:aws:kms:us-west-2:000000000000:1234abcd-12ab-34cd-56ef-1234567890ab" + }, + "start_time": "2022-07-13T15:58:51.432553", + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}