From 33085817a5419d74e195fe9aa00b564c2fea478e Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 29 Jul 2021 17:14:02 +0000
Subject: [PATCH 01/10] Added lambda step example

---
 .../sagemaker-pipelines-lambda-step.ipynb     | 896 ++++++++++++++++++
 1 file changed, 896 insertions(+)
 create mode 100644 sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
new file mode 100644
index 0000000000..81c6b617f0
--- /dev/null
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -0,0 +1,896 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### SageMaker Pipelines Lambda Step\n",
+    "\n",
+    "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline. \n",
+    "\n",
+    "The steps in this pipeline include -\n",
+    "* Preprocessing the abalone dataset\n",
+    "* Train an XGBoost Model\n",
+    "* Evaluate the model performance\n",
+    "* Create a model\n",
+    "* Deploy the model to a Sagemaker Hosted Endpoint using a Lambda Function\n",
+    "\n",
+    "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Prerequisites\n",
+    "\n",
+    "The notebook execution role should have policies which enable the notebook to create a Lambda function. The Amazon managed policy `AmazonSageMakerPipelinesIntegrations` can be added to the notebook execution role. \n",
+    "\n",
+    "The policy description is -\n",
+    "\n",
+    "```\n",
+    "\n",
+    "{\n",
+    "    \"Version\": \"2012-10-17\",\n",
+    "    \"Statement\": [\n",
+    "        {\n",
+    "            \"Effect\": \"Allow\",\n",
+    "            \"Action\": [\n",
+    "                \"lambda:CreateFunction\",\n",
+    "                \"lambda:DeleteFunction\",\n",
+    "                \"lambda:InvokeFunction\",\n",
+    "                \"lambda:UpdateFunctionCode\"\n",
+    "            ],\n",
+    "            \"Resource\": [\n",
+    "                \"arn:aws:lambda:*:*:function:*sagemaker*\",\n",
+    "                \"arn:aws:lambda:*:*:function:*sageMaker*\",\n",
+    "                \"arn:aws:lambda:*:*:function:*SageMaker*\"\n",
+    "            ]\n",
+    "        },\n",
+    "        {\n",
+    "            \"Effect\": \"Allow\",\n",
+    "            \"Action\": [\n",
+    "                \"sqs:CreateQueue\",\n",
+    "                \"sqs:SendMessage\"\n",
+    "            ],\n",
+    "            \"Resource\": [\n",
+    "                \"arn:aws:sqs:*:*:*sagemaker*\",\n",
+    "                \"arn:aws:sqs:*:*:*sageMaker*\",\n",
+    "                \"arn:aws:sqs:*:*:*SageMaker*\"\n",
+    "            ]\n",
+    "        },\n",
+    "        {\n",
+    "            \"Effect\": \"Allow\",\n",
+    "            \"Action\": [\n",
+    "                \"iam:PassRole\"\n",
+    "            ],\n",
+    "            \"Resource\": \"arn:aws:iam::*:role/*\",\n",
+    "            \"Condition\": {\n",
+    "                \"StringEquals\": {\n",
+    "                    \"iam:PassedToService\": [\n",
+    "                        \"lambda.amazonaws.com\"\n",
+    "                    ]\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
+    "    \n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import sys\n",
+    "# !{sys.executable} -m pip install \"sagemaker>=2.49.2\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import boto3\n",
+    "import sagemaker\n",
+    "\n",
+    "from sagemaker.estimator import Estimator\n",
+    "from sagemaker.inputs import TrainingInput\n",
+    "\n",
+    "from sagemaker.processing import (\n",
+    "    ProcessingInput,\n",
+    "    ProcessingOutput,\n",
+    "    Processor,\n",
+    "    ScriptProcessor,\n",
+    ")\n",
+    "\n",
+    "from sagemaker import Model\n",
+    "from sagemaker.xgboost import XGBoostPredictor\n",
+    "from sagemaker.sklearn.processing import SKLearnProcessor\n",
+    "\n",
+    "from sagemaker.workflow.parameters import (\n",
+    "    ParameterInteger,\n",
+    "    ParameterString,\n",
+    ")\n",
+    "from sagemaker.workflow.pipeline import Pipeline\n",
+    "from sagemaker.workflow.properties import PropertyFile\n",
+    "from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CacheConfig\n",
+    "from sagemaker.workflow.lambda_step import (\n",
+    "    LambdaStep,\n",
+    "    LambdaOutput,\n",
+    "    LambdaOutputTypeEnum,\n",
+    ")\n",
+    "from sagemaker.workflow.step_collections import CreateModelStep\n",
+    "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n",
+    "from sagemaker.workflow.condition_step import (\n",
+    "    ConditionStep,\n",
+    "    JsonGet,\n",
+    ")\n",
+    "\n",
+    "from sagemaker.lambda_helper import Lambda"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the SageMaker Session\n",
+    "\n",
+    "region = sagemaker.Session().boto_region_name\n",
+    "sm_client = boto3.client(\"sagemaker\")\n",
+    "boto_session = boto3.Session(region_name=region)\n",
+    "sagemaker_session = sagemaker.session.Session(\n",
+    "    boto_session=boto_session, sagemaker_client=sm_client\n",
+    ")\n",
+    "prefix = \"lambda-step-pipeline\"\n",
+    "\n",
+    "account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define variables and parameters needed for the Pipeline steps\n",
+    "\n",
+    "role = sagemaker.get_execution_role()\n",
+    "default_bucket = sagemaker_session.default_bucket()\n",
+    "base_job_prefix = \"lambda-step-example\"\n",
+    "s3_prefix = \"lambda-step-pipeline\"\n",
+    "\n",
+    "processing_instance_count = ParameterInteger(\n",
+    "    name=\"ProcessingInstanceCount\", default_value=1\n",
+    ")\n",
+    "processing_instance_type = ParameterString(\n",
+    "    name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n",
+    ")\n",
+    "training_instance_type = ParameterString(\n",
+    "    name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n",
+    ")\n",
+    "model_approval_status = ParameterString(\n",
+    "    name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n",
+    ")\n",
+    "input_data = ParameterString(\n",
+    "    name=\"InputDataUrl\",\n",
+    "    default_value=f\"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv\",\n",
+    ")\n",
+    "model_approval_status = ParameterString(\n",
+    "    name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n",
+    ")\n",
+    "\n",
+    "# Cache Pipeline steps to reduce execution time on subsequent executions\n",
+    "cache_config = CacheConfig(enable_caching=True, expire_after=\"30d\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Data Preparation\n",
+    "\n",
+    "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets. \n",
+    "\n",
+    "The output of this step is used as the input to the TrainingStep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile preprocess.py\n",
+    "\n",
+    "\"\"\"Feature engineers the abalone dataset.\"\"\"\n",
+    "import argparse\n",
+    "import logging\n",
+    "import os\n",
+    "import pathlib\n",
+    "import requests\n",
+    "import tempfile\n",
+    "\n",
+    "import boto3\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
+    "\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)\n",
+    "logger.addHandler(logging.StreamHandler())\n",
+    "\n",
+    "\n",
+    "# Since we get a headerless CSV file we specify the column names here.\n",
+    "feature_columns_names = [\n",
+    "    \"sex\",\n",
+    "    \"length\",\n",
+    "    \"diameter\",\n",
+    "    \"height\",\n",
+    "    \"whole_weight\",\n",
+    "    \"shucked_weight\",\n",
+    "    \"viscera_weight\",\n",
+    "    \"shell_weight\",\n",
+    "]\n",
+    "label_column = \"rings\"\n",
+    "\n",
+    "feature_columns_dtype = {\n",
+    "    \"sex\": str,\n",
+    "    \"length\": np.float64,\n",
+    "    \"diameter\": np.float64,\n",
+    "    \"height\": np.float64,\n",
+    "    \"whole_weight\": np.float64,\n",
+    "    \"shucked_weight\": np.float64,\n",
+    "    \"viscera_weight\": np.float64,\n",
+    "    \"shell_weight\": np.float64,\n",
+    "}\n",
+    "label_column_dtype = {\"rings\": np.float64}\n",
+    "\n",
+    "\n",
+    "def merge_two_dicts(x, y):\n",
+    "    \"\"\"Merges two dicts, returning a new copy.\"\"\"\n",
+    "    z = x.copy()\n",
+    "    z.update(y)\n",
+    "    return z\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    logger.debug(\"Starting preprocessing.\")\n",
+    "    parser = argparse.ArgumentParser()\n",
+    "    parser.add_argument(\"--input-data\", type=str, required=True)\n",
+    "    args = parser.parse_args()\n",
+    "\n",
+    "    base_dir = \"/opt/ml/processing\"\n",
+    "    pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n",
+    "    input_data = args.input_data\n",
+    "    bucket = input_data.split(\"/\")[2]\n",
+    "    key = \"/\".join(input_data.split(\"/\")[3:])\n",
+    "\n",
+    "    logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n",
+    "    fn = f\"{base_dir}/data/abalone-dataset.csv\"\n",
+    "    s3 = boto3.resource(\"s3\")\n",
+    "    s3.Bucket(bucket).download_file(key, fn)\n",
+    "\n",
+    "    logger.debug(\"Reading downloaded data.\")\n",
+    "    df = pd.read_csv(\n",
+    "        fn,\n",
+    "        header=None,\n",
+    "        names=feature_columns_names + [label_column],\n",
+    "        dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n",
+    "    )\n",
+    "    os.unlink(fn)\n",
+    "\n",
+    "    logger.debug(\"Defining transformers.\")\n",
+    "    numeric_features = list(feature_columns_names)\n",
+    "    numeric_features.remove(\"sex\")\n",
+    "    numeric_transformer = Pipeline(\n",
+    "        steps=[\n",
+    "            (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
+    "            (\"scaler\", StandardScaler()),\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    categorical_features = [\"sex\"]\n",
+    "    categorical_transformer = Pipeline(\n",
+    "        steps=[\n",
+    "            (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n",
+    "            (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    preprocess = ColumnTransformer(\n",
+    "        transformers=[\n",
+    "            (\"num\", numeric_transformer, numeric_features),\n",
+    "            (\"cat\", categorical_transformer, categorical_features),\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    logger.info(\"Applying transforms.\")\n",
+    "    y = df.pop(\"rings\")\n",
+    "    X_pre = preprocess.fit_transform(df)\n",
+    "    y_pre = y.to_numpy().reshape(len(y), 1)\n",
+    "\n",
+    "    X = np.concatenate((y_pre, X_pre), axis=1)\n",
+    "\n",
+    "    logger.info(\n",
+    "        \"Splitting %d rows of data into train, validation, test datasets.\", len(X)\n",
+    "    )\n",
+    "    np.random.shuffle(X)\n",
+    "    train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n",
+    "\n",
+    "    logger.info(\"Writing out datasets to %s.\", base_dir)\n",
+    "    pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n",
+    "    pd.DataFrame(validation).to_csv(\n",
+    "        f\"{base_dir}/validation/validation.csv\", header=False, index=False\n",
+    "    )\n",
+    "    pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Process the training data step using a python script.\n",
+    "# Split the training data set into train, test, and validation datasets\n",
+    "\n",
+    "sklearn_processor = SKLearnProcessor(\n",
+    "    framework_version=\"0.23-1\",\n",
+    "    instance_type=processing_instance_type,\n",
+    "    instance_count=processing_instance_count,\n",
+    "    base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n",
+    "    sagemaker_session=sagemaker_session,\n",
+    "    role=role,\n",
+    ")\n",
+    "step_process = ProcessingStep(\n",
+    "    name=\"PreprocessAbaloneData\",\n",
+    "    processor=sklearn_processor,\n",
+    "    outputs=[\n",
+    "        ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n",
+    "        ProcessingOutput(\n",
+    "            output_name=\"validation\", source=\"/opt/ml/processing/validation\"\n",
+    "        ),\n",
+    "        ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n",
+    "    ],\n",
+    "    code=\"preprocess.py\",\n",
+    "    job_arguments=[\"--input-data\", input_data],\n",
+    "    cache_config=cache_config,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Model Training\n",
+    "\n",
+    "Train an XGBoost model with the output of the ProcessingStep."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n",
+    "model_path = f\"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain\"\n",
+    "\n",
+    "image_uri = sagemaker.image_uris.retrieve(\n",
+    "    framework=\"xgboost\",\n",
+    "    region=region,\n",
+    "    version=\"1.0-1\",\n",
+    "    py_version=\"py3\",\n",
+    "    instance_type=training_instance_type,\n",
+    ")\n",
+    "\n",
+    "xgb_train = Estimator(\n",
+    "    image_uri=image_uri,\n",
+    "    instance_type=training_instance_type,\n",
+    "    instance_count=1,\n",
+    "    output_path=model_path,\n",
+    "    base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n",
+    "    sagemaker_session=sagemaker_session,\n",
+    "    role=role,\n",
+    ")\n",
+    "\n",
+    "xgb_train.set_hyperparameters(\n",
+    "    objective=\"reg:linear\",\n",
+    "    num_round=50,\n",
+    "    max_depth=5,\n",
+    "    eta=0.2,\n",
+    "    gamma=4,\n",
+    "    min_child_weight=6,\n",
+    "    subsample=0.7,\n",
+    "    silent=0,\n",
+    ")\n",
+    "\n",
+    "step_train = TrainingStep(\n",
+    "    name=\"TrainAbaloneModel\",\n",
+    "    estimator=xgb_train,\n",
+    "    inputs={\n",
+    "        \"train\": TrainingInput(\n",
+    "            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n",
+    "                \"train\"\n",
+    "            ].S3Output.S3Uri,\n",
+    "            content_type=\"text/csv\",\n",
+    "        ),\n",
+    "        \"validation\": TrainingInput(\n",
+    "            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n",
+    "                \"validation\"\n",
+    "            ].S3Output.S3Uri,\n",
+    "            content_type=\"text/csv\",\n",
+    "        ),\n",
+    "    },\n",
+    "    cache_config=cache_config,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Evaluate the model\n",
+    "\n",
+    "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model will be created and a Lambda will be invoked to deploy the model to a SageMaker Endpoint. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile evaluate.py\n",
+    "\n",
+    "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n",
+    "import json\n",
+    "import logging\n",
+    "import pathlib\n",
+    "import pickle\n",
+    "import tarfile\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import xgboost\n",
+    "\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(logging.INFO)\n",
+    "logger.addHandler(logging.StreamHandler())\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    logger.debug(\"Starting evaluation.\")\n",
+    "    model_path = \"/opt/ml/processing/model/model.tar.gz\"\n",
+    "    with tarfile.open(model_path) as tar:\n",
+    "        tar.extractall(path=\".\")\n",
+    "\n",
+    "    logger.debug(\"Loading xgboost model.\")\n",
+    "    model = pickle.load(open(\"xgboost-model\", \"rb\"))\n",
+    "\n",
+    "    logger.debug(\"Reading test data.\")\n",
+    "    test_path = \"/opt/ml/processing/test/test.csv\"\n",
+    "    df = pd.read_csv(test_path, header=None)\n",
+    "\n",
+    "    logger.debug(\"Reading test data.\")\n",
+    "    y_test = df.iloc[:, 0].to_numpy()\n",
+    "    df.drop(df.columns[0], axis=1, inplace=True)\n",
+    "    X_test = xgboost.DMatrix(df.values)\n",
+    "\n",
+    "    logger.info(\"Performing predictions against test data.\")\n",
+    "    predictions = model.predict(X_test)\n",
+    "\n",
+    "    logger.debug(\"Calculating mean squared error.\")\n",
+    "    mse = mean_squared_error(y_test, predictions)\n",
+    "    std = np.std(y_test - predictions)\n",
+    "    report_dict = {\n",
+    "        \"regression_metrics\": {\n",
+    "            \"mse\": {\"value\": mse, \"standard_deviation\": std},\n",
+    "        },\n",
+    "    }\n",
+    "\n",
+    "    output_dir = \"/opt/ml/processing/evaluation\"\n",
+    "    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "    logger.info(\"Writing out evaluation report with mse: %f\", mse)\n",
+    "    evaluation_path = f\"{output_dir}/evaluation.json\"\n",
+    "    with open(evaluation_path, \"w\") as f:\n",
+    "        f.write(json.dumps(report_dict))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# A ProcessingStep is used to evaluate the performance of the trained model. Based on the results of the evaluation, the model is created and deployed.\n",
+    "\n",
+    "script_eval = ScriptProcessor(\n",
+    "    image_uri=image_uri,\n",
+    "    command=[\"python3\"],\n",
+    "    instance_type=processing_instance_type,\n",
+    "    instance_count=1,\n",
+    "    base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n",
+    "    sagemaker_session=sagemaker_session,\n",
+    "    role=role,\n",
+    ")\n",
+    "\n",
+    "evaluation_report = PropertyFile(\n",
+    "    name=\"AbaloneEvaluationReport\",\n",
+    "    output_name=\"evaluation\",\n",
+    "    path=\"evaluation.json\",\n",
+    ")\n",
+    "\n",
+    "step_eval = ProcessingStep(\n",
+    "    name=\"EvaluateAbaloneModel\",\n",
+    "    processor=script_eval,\n",
+    "    inputs=[\n",
+    "        ProcessingInput(\n",
+    "            source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n",
+    "            destination=\"/opt/ml/processing/model\",\n",
+    "        ),\n",
+    "        ProcessingInput(\n",
+    "            source=step_process.properties.ProcessingOutputConfig.Outputs[\n",
+    "                \"test\"\n",
+    "            ].S3Output.S3Uri,\n",
+    "            destination=\"/opt/ml/processing/test\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    outputs=[\n",
+    "        ProcessingOutput(\n",
+    "            output_name=\"evaluation\",\n",
+    "            source=\"/opt/ml/processing/evaluation\",\n",
+    "            destination=f\"s3://{default_bucket}/{s3_prefix}/evaluation_report\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    code=\"evaluate.py\",\n",
+    "    property_files=[evaluation_report],\n",
+    "    cache_config=cache_config,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the model\n",
+    "\n",
+    "The model is created and the name of the model is provided to the Lambda function for deployment. The `CreateModelStep` dynamically assigns a name to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create Model\n",
+    "model = Model(\n",
+    "    image_uri=image_uri,\n",
+    "    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n",
+    "    sagemaker_session=sagemaker_session,\n",
+    "    role=role,\n",
+    "    predictor_cls=XGBoostPredictor,\n",
+    ")\n",
+    "\n",
+    "step_create_model = CreateModelStep(\n",
+    "    name=\"CreateModel\",\n",
+    "    model=model,\n",
+    "    inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Create the Lambda Step\n",
+    "\n",
+    "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providng a script, function name and role for the Lambda function. \n",
+    "\n",
+    "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n",
+    "\n",
+    "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Define the Lambda function\n",
+    "\n",
+    "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the lambda step. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile lambda_helper.py\n",
+    "\n",
+    "\"\"\"\n",
+    "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint. \n",
+    "The name of the model to deploy is provided via the `event` argument\n",
+    "\"\"\"\n",
+    "\n",
+    "import json\n",
+    "import boto3\n",
+    "\n",
+    "\n",
+    "def lambda_handler(event, context):\n",
+    "    \"\"\" \"\"\"\n",
+    "    sm_client = boto3.client(\"sagemaker\")\n",
+    "\n",
+    "    # The name of the model created in the Pipeline CreateModelStep\n",
+    "    model_name = event[\"model_name\"]\n",
+    "\n",
+    "    endpoint_config_name = event[\"endpoint_config_name\"]\n",
+    "    endpoint_name = event[\"endpoint_name\"]\n",
+    "\n",
+    "    create_endpoint_config_response = sm_client.create_endpoint_config(\n",
+    "        EndpointConfigName=endpoint_config_name,\n",
+    "        ProductionVariants=[\n",
+    "            {\n",
+    "                \"InstanceType\": \"ml.m4.xlarge\",\n",
+    "                \"InitialVariantWeight\": 1,\n",
+    "                \"InitialInstanceCount\": 1,\n",
+    "                \"ModelName\": model_name,\n",
+    "                \"VariantName\": \"AllTraffic\",\n",
+    "            }\n",
+    "        ],\n",
+    "    )\n",
+    "\n",
+    "    create_endpoint_response = sm_client.create_endpoint(\n",
+    "        EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name\n",
+    "    )\n",
+    "\n",
+    "    return {\n",
+    "        \"statusCode\": 200,\n",
+    "        \"body\": json.dumps(\"Created Endpoint!\"),\n",
+    "        \"other_key\": \"example_value\",\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### IAM Role\n",
+    "\n",
+    "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lambda_role = None\n",
+    "assert lambda_role is not None, \"Lambda role must be provided!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Custom Lambda Step\n",
+    "\n",
+    "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n",
+    "model_name = \"demo-lambda-model\" + current_time\n",
+    "endpoint_config_name = \"demo-lambda-deploy-endpoint-config-\" + current_time\n",
+    "endpoint_name = \"demo-lambda-deploy-endpoint-\" + current_time\n",
+    "\n",
+    "function_name = \"sagemaker-lambda-step-endpoint-deploy-\" + current_time\n",
+    "\n",
+    "# Lambda helper class can be used to create the Lambda function\n",
+    "func = Lambda(\n",
+    "    function_name=function_name,\n",
+    "    execution_role_arn=lambda_role,\n",
+    "    script=\"lambda_helper.py\",\n",
+    "    handler=\"lambda_helper.lambda_handler\",\n",
+    ")\n",
+    "\n",
+    "output_param_1 = LambdaOutput(\n",
+    "    output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String\n",
+    ")\n",
+    "output_param_2 = LambdaOutput(\n",
+    "    output_name=\"body\", output_type=LambdaOutputTypeEnum.String\n",
+    ")\n",
+    "output_param_3 = LambdaOutput(\n",
+    "    output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String\n",
+    ")\n",
+    "\n",
+    "step_deploy_lambda = LambdaStep(\n",
+    "    name=\"LambdaStep\",\n",
+    "    lambda_func=func,\n",
+    "    inputs={\n",
+    "        \"model_name\": step_create_model.properties.ModelName,\n",
+    "        \"endpoint_config_name\": endpoint_config_name,\n",
+    "        \"endpoint_name\": endpoint_name,\n",
+    "    },\n",
+    "    outputs=[output_param_1, output_param_2, output_param_3],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# condition step for evaluating model quality and branching execution.\n",
+    "# The `json_path` value is based on the `report_dict` variable in `evaluate.py`\n",
+    "\n",
+    "cond_lte = ConditionLessThanOrEqualTo(\n",
+    "    left=JsonGet(\n",
+    "        step=step_eval,\n",
+    "        property_file=evaluation_report,\n",
+    "        json_path=\"regression_metrics.mse.value\",\n",
+    "    ),\n",
+    "    right=6.0,\n",
+    ")\n",
+    "\n",
+    "step_cond = ConditionStep(\n",
+    "    name=\"CheckMSEAbaloneEvaluation\",\n",
+    "    conditions=[cond_lte],\n",
+    "    if_steps=[step_create_model, step_deploy_lambda],\n",
+    "    else_steps=[],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the same pipeline name across execution for cache usage.\n",
+    "\n",
+    "pipeline_name = \"lambda-step-pipeline\" + current_time\n",
+    "\n",
+    "pipeline = Pipeline(\n",
+    "    name=pipeline_name,\n",
+    "    parameters=[\n",
+    "        processing_instance_type,\n",
+    "        processing_instance_count,\n",
+    "        training_instance_type,\n",
+    "        input_data,\n",
+    "        model_approval_status,\n",
+    "    ],\n",
+    "    steps=[step_process, step_train, step_eval, step_cond],\n",
+    "    sagemaker_session=sagemaker_session,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Execute the Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "definition = json.loads(pipeline.definition())\n",
+    "definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline.upsert(role_arn=role)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "execution = pipeline.start()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "execution.wait()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Cleaning up resources\n",
+    "\n",
+    "Running the following cell will delete the following resources created in this notebook -\n",
+    "* SageMaker Model\n",
+    "* SageMaker Endpoint Configuration\n",
+    "* SageMaker Endpoint\n",
+    "* SageMaker Pipeline\n",
+    "* Lambda Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a SageMaker client\n",
+    "sm_client = boto3.client(\"sagemaker\")\n",
+    "\n",
+    "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available outside the Pipeline execution context\n",
+    "# so `step_create_model.properties.ModelName` can not be used while deleting the model.\n",
+    "model_name = sm_client.describe_endpoint_config(\n",
+    "    EndpointConfigName=endpoint_config_name\n",
+    ")[\"ProductionVariants\"][0][\"ModelName\"]\n",
+    "\n",
+    "# Delete the Model\n",
+    "sm_client.delete_model(ModelName=model_name)\n",
+    "\n",
+    "# Delete the EndpointConfig\n",
+    "sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)\n",
+    "\n",
+    "# Delete the endpoint\n",
+    "sm_client.delete_endpoint(EndpointName=endpoint_name)\n",
+    "\n",
+    "# Delete the Lambda function\n",
+    "func.delete()\n",
+    "\n",
+    "# Delete the Pipeline\n",
+    "sm_client.delete_pipeline(PipelineName=pipeline_name)"
+   ]
+  }
+ ],
+ "metadata": {
+  "instance_type": "ml.t3.medium",
+  "kernelspec": {
+   "display_name": "Python 3.9.4 64-bit ('python@3.9')",
+   "name": "python394jvsc74a57bd0ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc"
+  },
+  "language_info": {
+   "name": "python",
+   "version": ""
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 0d621e0b032e82140e2b784e0c3b5ba4e925d529 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 29 Jul 2021 17:39:24 +0000
Subject: [PATCH 02/10] Updated notes on IAM policy for Lambda

---
 .../sagemaker-pipelines-lambda-step.ipynb     | 69 ++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 81c6b617f0..161ede6e25 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -675,7 +675,74 @@
    "source": [
     "#### IAM Role\n",
     "\n",
-    "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. "
+    "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n",
+    "\n",
+    "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n",
+    "\n",
+    "If the notebook execution role has `iam:CreateRole` permission, the following code snipped can be used to create a role for the Lambda function - \n",
+    "\n",
+    "```\n",
+    "path='/'\n",
+    "role_name='lambda-pipelines-role'\n",
+    "\n",
+    "trust_policy={\n",
+    "  \"Version\": \"2012-10-17\",\n",
+    "  \"Statement\": [\n",
+    "    {\n",
+    "      \"Effect\": \"Allow\",\n",
+    "      \"Principal\": {\n",
+    "        \"Service\": \"lambda.amazonaws.com\"\n",
+    "      },\n",
+    "      \"Action\": \"sts:AssumeRole\"\n",
+    "    }\n",
+    "  ]\n",
+    "}\n",
+    "\n",
+    "response = iam.create_role(\n",
+    "    Path=path,\n",
+    "    RoleName=role_name,\n",
+    "    AssumeRolePolicyDocument=json.dumps(trust_policy),\n",
+    "    MaxSessionDuration=3600\n",
+    ")\n",
+    "\n",
+    "lambda_policy = {\n",
+    "    \"Version\": \"2012-10-17\",\n",
+    "    \"Statement\": [\n",
+    "        {\n",
+    "            \"Effect\": \"Allow\",\n",
+    "            \"Action\": \"logs:CreateLogGroup\",\n",
+    "            \"Resource\": f\"arn:aws:logs:{region}:{account_id}:*\"\n",
+    "        },\n",
+    "        {\n",
+    "            \"Effect\": \"Allow\",\n",
+    "            \"Action\": [\n",
+    "                \"logs:CreateLogStream\",\n",
+    "                \"logs:PutLogEvents\"\n",
+    "            ],\n",
+    "            \"Resource\": [\n",
+    "                f\"arn:aws:logs:{region}:{account_id}:log-group:/aws/lambda/*:*\"\n",
+    "            ]\n",
+    "        }\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "response = iam.create_policy(\n",
+    "  PolicyName='lambda-policy-for-pipelines',\n",
+    "  PolicyDocument=json.dumps(lambda_policy)\n",
+    ")\n",
+    "\n",
+    "iam.attach_role_policy(\n",
+    "    PolicyArn=f'arn:aws:iam::${account_id}:policy/lambda-sagemaker-policy',\n",
+    "    RoleName=role_name\n",
+    ")\n",
+    "\n",
+    "### The policy used here is a FullAccess policy for SageMaker. It is best practice to provide policies with least priviledges. \n",
+    "iam.attach_role_policy(\n",
+    "    PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',\n",
+    "    RoleName=role_name\n",
+    ")\n",
+    "\n",
+    "```"
    ]
   },
   {

From a32c80abef04d50b826ad71feddc7cbfb9899113 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 29 Jul 2021 17:49:15 +0000
Subject: [PATCH 03/10] Updated IAM notes

---
 .../sagemaker-pipelines-lambda-step.ipynb     | 67 +------------------
 1 file changed, 2 insertions(+), 65 deletions(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 161ede6e25..891a5ac7c0 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -677,72 +677,9 @@
     "\n",
     "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n",
     "\n",
-    "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n",
+    "The role needs Lambda basic execution permissions, Lambda trust policy, and any other sagemaker permissions based on what the Lambda function is doing\n",
     "\n",
-    "If the notebook execution role has `iam:CreateRole` permission, the following code snipped can be used to create a role for the Lambda function - \n",
-    "\n",
-    "```\n",
-    "path='/'\n",
-    "role_name='lambda-pipelines-role'\n",
-    "\n",
-    "trust_policy={\n",
-    "  \"Version\": \"2012-10-17\",\n",
-    "  \"Statement\": [\n",
-    "    {\n",
-    "      \"Effect\": \"Allow\",\n",
-    "      \"Principal\": {\n",
-    "        \"Service\": \"lambda.amazonaws.com\"\n",
-    "      },\n",
-    "      \"Action\": \"sts:AssumeRole\"\n",
-    "    }\n",
-    "  ]\n",
-    "}\n",
-    "\n",
-    "response = iam.create_role(\n",
-    "    Path=path,\n",
-    "    RoleName=role_name,\n",
-    "    AssumeRolePolicyDocument=json.dumps(trust_policy),\n",
-    "    MaxSessionDuration=3600\n",
-    ")\n",
-    "\n",
-    "lambda_policy = {\n",
-    "    \"Version\": \"2012-10-17\",\n",
-    "    \"Statement\": [\n",
-    "        {\n",
-    "            \"Effect\": \"Allow\",\n",
-    "            \"Action\": \"logs:CreateLogGroup\",\n",
-    "            \"Resource\": f\"arn:aws:logs:{region}:{account_id}:*\"\n",
-    "        },\n",
-    "        {\n",
-    "            \"Effect\": \"Allow\",\n",
-    "            \"Action\": [\n",
-    "                \"logs:CreateLogStream\",\n",
-    "                \"logs:PutLogEvents\"\n",
-    "            ],\n",
-    "            \"Resource\": [\n",
-    "                f\"arn:aws:logs:{region}:{account_id}:log-group:/aws/lambda/*:*\"\n",
-    "            ]\n",
-    "        }\n",
-    "    ]\n",
-    "}\n",
-    "\n",
-    "response = iam.create_policy(\n",
-    "  PolicyName='lambda-policy-for-pipelines',\n",
-    "  PolicyDocument=json.dumps(lambda_policy)\n",
-    ")\n",
-    "\n",
-    "iam.attach_role_policy(\n",
-    "    PolicyArn=f'arn:aws:iam::${account_id}:policy/lambda-sagemaker-policy',\n",
-    "    RoleName=role_name\n",
-    ")\n",
-    "\n",
-    "### The policy used here is a FullAccess policy for SageMaker. It is best practice to provide policies with least priviledges. \n",
-    "iam.attach_role_policy(\n",
-    "    PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',\n",
-    "    RoleName=role_name\n",
-    ")\n",
-    "\n",
-    "```"
+    "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. "
    ]
   },
   {

From f67a731e2eeff9527bfdd4c173584f479bc91959 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Mon, 2 Aug 2021 14:29:34 +0000
Subject: [PATCH 04/10] Added pip install

---
 .../sagemaker-pipelines-lambda-step.ipynb     | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 891a5ac7c0..c14baeab43 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -85,8 +85,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# import sys\n",
-    "# !{sys.executable} -m pip install \"sagemaker>=2.49.2\""
+    "import sys\n",
+    "\n",
+    "!{sys.executable} -m pip install \"sagemaker>=2.50.0\""
    ]
   },
   {
@@ -882,12 +883,21 @@
  "metadata": {
   "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "Python 3.9.4 64-bit ('python@3.9')",
-   "name": "python394jvsc74a57bd0ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc"
+   "display_name": "Python 3 (Data Science)",
+   "language": "python",
+   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
   },
   "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
    "name": "python",
-   "version": ""
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
   },
   "metadata": {
    "interpreter": {

From 0e8f82df5bb8f6d303571ce34d6e3d68b9d2c83b Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Tue, 3 Aug 2021 17:04:05 +0000
Subject: [PATCH 05/10] Updated pip install

---
 .../tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index c14baeab43..5b41fe9fdf 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -87,7 +87,7 @@
    "source": [
     "import sys\n",
     "\n",
-    "!{sys.executable} -m pip install \"sagemaker>=2.50.0\""
+    "!{sys.executable} -m pip install \"sagemaker>=2.51.0\""
    ]
   },
   {

From cd721543285a5ec63a2dac6d5586e5f7b11b7884 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 5 Aug 2021 18:18:37 +0000
Subject: [PATCH 06/10] Added iam helper function

---
 .../tabular/lambda-step/iam_helper.py         | 42 +++++++++++++++++++
 .../sagemaker-pipelines-lambda-step.ipynb     |  9 ++--
 2 files changed, 47 insertions(+), 4 deletions(-)
 create mode 100644 sagemaker-pipelines/tabular/lambda-step/iam_helper.py

diff --git a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py
new file mode 100644
index 0000000000..deb4d3b60a
--- /dev/null
+++ b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py
@@ -0,0 +1,42 @@
+import boto3
+import json
+
+iam = boto3.client('iam')
+
+def create_lambda_role(role_name):
+    try:
+        response = iam.create_role(
+            RoleName = role_name,
+            AssumeRolePolicyDocument = json.dumps({
+                "Version": "2012-10-17",
+                "Statement": [
+                    {
+                        "Effect": "Allow",
+                        "Principal": {
+                            "Service": "lambda.amazonaws.com"
+                        },
+                        "Action": "sts:AssumeRole"
+                    }
+                ]
+            }),
+            Description='Role for Lambda to call ECS Fargate task'
+        )
+
+        role_arn = response['Role']['Arn']
+
+        response = iam.attach_role_policy(
+            RoleName=role_name,
+            PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
+        )
+
+        response = iam.attach_role_policy(
+            PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',
+            RoleName=role_name
+        )
+
+        return role_arn
+
+    except iam.exceptions.EntityAlreadyExistsException:
+        print(f'Using ARN from existing role: {role_name}')
+        response = iam.get_role(RoleName=role_name)
+        return response['Role']['Arn']
\ No newline at end of file
diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 5b41fe9fdf..02937bfded 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -678,9 +678,9 @@
     "\n",
     "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n",
     "\n",
-    "The role needs Lambda basic execution permissions, Lambda trust policy, and any other sagemaker permissions based on what the Lambda function is doing\n",
+    "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n",
     "\n",
-    "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. "
+    "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least priviledges as per AWS IAM best practices."
    ]
   },
   {
@@ -689,8 +689,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lambda_role = None\n",
-    "assert lambda_role is not None, \"Lambda role must be provided!\""
+    "from iam_helper import create_lambda_role\n",
+    "\n",
+    "lambda_role = create_lambda_role(\"lambda-deployment-role\")"
    ]
   },
   {

From 20fb4ca9e024f967a53dfd74ef43df90e19cebf9 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 5 Aug 2021 19:18:09 +0000
Subject: [PATCH 07/10] Updated role description

---
 sagemaker-pipelines/tabular/lambda-step/iam_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py
index deb4d3b60a..51ff704ff0 100644
--- a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py
+++ b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py
@@ -19,7 +19,7 @@ def create_lambda_role(role_name):
                     }
                 ]
             }),
-            Description='Role for Lambda to call ECS Fargate task'
+            Description='Role for Lambda to call SageMaker functions'
         )
 
         role_arn = response['Role']['Arn']

From b43ae3e32c8edaaf10fbb30b8e64c6fe6de41647 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Thu, 5 Aug 2021 21:04:18 +0000
Subject: [PATCH 08/10] Fixed typos

---
 .../lambda-step/sagemaker-pipelines-lambda-step.ipynb       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 02937bfded..e1fc5f1495 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -13,7 +13,7 @@
     "* Train an XGBoost Model\n",
     "* Evaluate the model performance\n",
     "* Create a model\n",
-    "* Deploy the model to a Sagemaker Hosted Endpoint using a Lambda Function\n",
+    "* Deploy the model to a SageMaker Hosted Endpoint using a Lambda Function\n",
     "\n",
     "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step."
    ]
@@ -603,7 +603,7 @@
    "source": [
     "#### Create the Lambda Step\n",
     "\n",
-    "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providng a script, function name and role for the Lambda function. \n",
+    "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function. \n",
     "\n",
     "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n",
     "\n",
@@ -680,7 +680,7 @@
     "\n",
     "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n",
     "\n",
-    "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least priviledges as per AWS IAM best practices."
+    "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least privileges as per AWS IAM best practices."
    ]
   },
   {

From 54eab65cb493db6f069dab3334af6d00fe523b6d Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Fri, 6 Aug 2021 14:00:48 +0000
Subject: [PATCH 09/10] Updated md cells

---
 .../tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index e1fc5f1495..9e19a0f280 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -680,7 +680,7 @@
     "\n",
     "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n",
     "\n",
-    "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least privileges as per AWS IAM best practices."
+    "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy with least privileges as per AWS IAM best practices."
    ]
   },
   {

From 8fb68ca2960aa94f4154946c5c733c6f0b1f0a97 Mon Sep 17 00:00:00 2001
From: kthadaka <kthadaka!>
Date: Fri, 6 Aug 2021 18:57:25 +0000
Subject: [PATCH 10/10] updated formatting

---
 .../sagemaker-pipelines-lambda-step.ipynb     | 46 ++++++-------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
index 9e19a0f280..69df9cf634 100644
--- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
+++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb
@@ -148,9 +148,7 @@
     "region = sagemaker.Session().boto_region_name\n",
     "sm_client = boto3.client(\"sagemaker\")\n",
     "boto_session = boto3.Session(region_name=region)\n",
-    "sagemaker_session = sagemaker.session.Session(\n",
-    "    boto_session=boto_session, sagemaker_client=sm_client\n",
-    ")\n",
+    "sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)\n",
     "prefix = \"lambda-step-pipeline\"\n",
     "\n",
     "account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")"
@@ -169,15 +167,11 @@
     "base_job_prefix = \"lambda-step-example\"\n",
     "s3_prefix = \"lambda-step-pipeline\"\n",
     "\n",
-    "processing_instance_count = ParameterInteger(\n",
-    "    name=\"ProcessingInstanceCount\", default_value=1\n",
-    ")\n",
+    "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n",
     "processing_instance_type = ParameterString(\n",
     "    name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n",
     ")\n",
-    "training_instance_type = ParameterString(\n",
-    "    name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n",
-    ")\n",
+    "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n",
     "model_approval_status = ParameterString(\n",
     "    name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n",
     ")\n",
@@ -325,9 +319,7 @@
     "\n",
     "    X = np.concatenate((y_pre, X_pre), axis=1)\n",
     "\n",
-    "    logger.info(\n",
-    "        \"Splitting %d rows of data into train, validation, test datasets.\", len(X)\n",
-    "    )\n",
+    "    logger.info(\"Splitting %d rows of data into train, validation, test datasets.\", len(X))\n",
     "    np.random.shuffle(X)\n",
     "    train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n",
     "\n",
@@ -361,9 +353,7 @@
     "    processor=sklearn_processor,\n",
     "    outputs=[\n",
     "        ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n",
-    "        ProcessingOutput(\n",
-    "            output_name=\"validation\", source=\"/opt/ml/processing/validation\"\n",
-    "        ),\n",
+    "        ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n",
     "        ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n",
     "    ],\n",
     "    code=\"preprocess.py\",\n",
@@ -424,9 +414,7 @@
     "    estimator=xgb_train,\n",
     "    inputs={\n",
     "        \"train\": TrainingInput(\n",
-    "            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n",
-    "                \"train\"\n",
-    "            ].S3Output.S3Uri,\n",
+    "            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n",
     "            content_type=\"text/csv\",\n",
     "        ),\n",
     "        \"validation\": TrainingInput(\n",
@@ -547,9 +535,7 @@
     "            destination=\"/opt/ml/processing/model\",\n",
     "        ),\n",
     "        ProcessingInput(\n",
-    "            source=step_process.properties.ProcessingOutputConfig.Outputs[\n",
-    "                \"test\"\n",
-    "            ].S3Output.S3Uri,\n",
+    "            source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n",
     "            destination=\"/opt/ml/processing/test\",\n",
     "        ),\n",
     "    ],\n",
@@ -717,15 +703,9 @@
     "    handler=\"lambda_helper.lambda_handler\",\n",
     ")\n",
     "\n",
-    "output_param_1 = LambdaOutput(\n",
-    "    output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String\n",
-    ")\n",
-    "output_param_2 = LambdaOutput(\n",
-    "    output_name=\"body\", output_type=LambdaOutputTypeEnum.String\n",
-    ")\n",
-    "output_param_3 = LambdaOutput(\n",
-    "    output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String\n",
-    ")\n",
+    "output_param_1 = LambdaOutput(output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String)\n",
+    "output_param_2 = LambdaOutput(output_name=\"body\", output_type=LambdaOutputTypeEnum.String)\n",
+    "output_param_3 = LambdaOutput(output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String)\n",
     "\n",
     "step_deploy_lambda = LambdaStep(\n",
     "    name=\"LambdaStep\",\n",
@@ -860,9 +840,9 @@
     "\n",
     "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available outside the Pipeline execution context\n",
     "# so `step_create_model.properties.ModelName` can not be used while deleting the model.\n",
-    "model_name = sm_client.describe_endpoint_config(\n",
-    "    EndpointConfigName=endpoint_config_name\n",
-    ")[\"ProductionVariants\"][0][\"ModelName\"]\n",
+    "model_name = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)[\n",
+    "    \"ProductionVariants\"\n",
+    "][0][\"ModelName\"]\n",
     "\n",
     "# Delete the Model\n",
     "sm_client.delete_model(ModelName=model_name)\n",