From 33085817a5419d74e195fe9aa00b564c2fea478e Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 29 Jul 2021 17:14:02 +0000 Subject: [PATCH 01/10] Added lambda step example --- .../sagemaker-pipelines-lambda-step.ipynb | 896 ++++++++++++++++++ 1 file changed, 896 insertions(+) create mode 100644 sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb new file mode 100644 index 0000000000..81c6b617f0 --- /dev/null +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -0,0 +1,896 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### SageMaker Pipelines Lambda Step\n", + "\n", + "This notebook illustrates how a Lambda function can be run as a step in a SageMaker Pipeline. \n", + "\n", + "The steps in this pipeline include -\n", + "* Preprocessing the abalone dataset\n", + "* Train an XGBoost Model\n", + "* Evaluate the model performance\n", + "* Create a model\n", + "* Deploy the model to a Sagemaker Hosted Endpoint using a Lambda Function\n", + "\n", + "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Prerequisites\n", + "\n", + "The notebook execution role should have policies which enable the notebook to create a Lambda function. The Amazon managed policy `AmazonSageMakerPipelinesIntegrations` can be added to the notebook execution role. \n", + "\n", + "The policy description is -\n", + "\n", + "```\n", + "\n", + "{\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"lambda:CreateFunction\",\n", + " \"lambda:DeleteFunction\",\n", + " \"lambda:InvokeFunction\",\n", + " \"lambda:UpdateFunctionCode\"\n", + " ],\n", + " \"Resource\": [\n", + " \"arn:aws:lambda:*:*:function:*sagemaker*\",\n", + " \"arn:aws:lambda:*:*:function:*sageMaker*\",\n", + " \"arn:aws:lambda:*:*:function:*SageMaker*\"\n", + " ]\n", + " },\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"sqs:CreateQueue\",\n", + " \"sqs:SendMessage\"\n", + " ],\n", + " \"Resource\": [\n", + " \"arn:aws:sqs:*:*:*sagemaker*\",\n", + " \"arn:aws:sqs:*:*:*sageMaker*\",\n", + " \"arn:aws:sqs:*:*:*SageMaker*\"\n", + " ]\n", + " },\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"iam:PassRole\"\n", + " ],\n", + " \"Resource\": \"arn:aws:iam::*:role/*\",\n", + " \"Condition\": {\n", + " \"StringEquals\": {\n", + " \"iam:PassedToService\": [\n", + " \"lambda.amazonaws.com\"\n", + " ]\n", + " }\n", + " }\n", + " }\n", + " ]\n", + "}\n", + " \n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import sys\n", + "# !{sys.executable} -m pip install \"sagemaker>=2.49.2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import boto3\n", + "import sagemaker\n", + "\n", + "from sagemaker.estimator import Estimator\n", + "from sagemaker.inputs import TrainingInput\n", + "\n", + "from sagemaker.processing import (\n", + " ProcessingInput,\n", + " ProcessingOutput,\n", + " Processor,\n", + " ScriptProcessor,\n", + ")\n", + "\n", + "from sagemaker import Model\n", + "from sagemaker.xgboost import XGBoostPredictor\n", + "from sagemaker.sklearn.processing import SKLearnProcessor\n", + "\n", + "from sagemaker.workflow.parameters import (\n", + " ParameterInteger,\n", + " ParameterString,\n", + ")\n", + "from sagemaker.workflow.pipeline import Pipeline\n", + "from sagemaker.workflow.properties import PropertyFile\n", + "from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CacheConfig\n", + "from sagemaker.workflow.lambda_step import (\n", + " LambdaStep,\n", + " LambdaOutput,\n", + " LambdaOutputTypeEnum,\n", + ")\n", + "from sagemaker.workflow.step_collections import CreateModelStep\n", + "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", + "from sagemaker.workflow.condition_step import (\n", + " ConditionStep,\n", + " JsonGet,\n", + ")\n", + "\n", + "from sagemaker.lambda_helper import Lambda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create the SageMaker Session\n", + "\n", + "region = sagemaker.Session().boto_region_name\n", + "sm_client = boto3.client(\"sagemaker\")\n", + "boto_session = boto3.Session(region_name=region)\n", + "sagemaker_session = sagemaker.session.Session(\n", + " boto_session=boto_session, sagemaker_client=sm_client\n", + ")\n", + "prefix = \"lambda-step-pipeline\"\n", + "\n", + "account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define variables and parameters needed for the Pipeline steps\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "default_bucket = sagemaker_session.default_bucket()\n", + "base_job_prefix = \"lambda-step-example\"\n", + "s3_prefix = \"lambda-step-pipeline\"\n", + "\n", + "processing_instance_count = ParameterInteger(\n", + " name=\"ProcessingInstanceCount\", default_value=1\n", + ")\n", + "processing_instance_type = ParameterString(\n", + " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", + ")\n", + "training_instance_type = ParameterString(\n", + " name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n", + ")\n", + "model_approval_status = ParameterString(\n", + " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", + ")\n", + "input_data = ParameterString(\n", + " name=\"InputDataUrl\",\n", + " default_value=f\"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv\",\n", + ")\n", + "model_approval_status = ParameterString(\n", + " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", + ")\n", + "\n", + "# Cache Pipeline steps to reduce execution time on subsequent executions\n", + "cache_config = CacheConfig(enable_caching=True, expire_after=\"30d\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Data Preparation\n", + "\n", + "An SKLearn processor is used to prepare the dataset for the Hyperparameter Tuning job. Using the script `preprocess.py`, the dataset is featurized and split into train, test, and validation datasets. \n", + "\n", + "The output of this step is used as the input to the TrainingStep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile preprocess.py\n", + "\n", + "\"\"\"Feature engineers the abalone dataset.\"\"\"\n", + "import argparse\n", + "import logging\n", + "import os\n", + "import pathlib\n", + "import requests\n", + "import tempfile\n", + "\n", + "import boto3\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.INFO)\n", + "logger.addHandler(logging.StreamHandler())\n", + "\n", + "\n", + "# Since we get a headerless CSV file we specify the column names here.\n", + "feature_columns_names = [\n", + " \"sex\",\n", + " \"length\",\n", + " \"diameter\",\n", + " \"height\",\n", + " \"whole_weight\",\n", + " \"shucked_weight\",\n", + " \"viscera_weight\",\n", + " \"shell_weight\",\n", + "]\n", + "label_column = \"rings\"\n", + "\n", + "feature_columns_dtype = {\n", + " \"sex\": str,\n", + " \"length\": np.float64,\n", + " \"diameter\": np.float64,\n", + " \"height\": np.float64,\n", + " \"whole_weight\": np.float64,\n", + " \"shucked_weight\": np.float64,\n", + " \"viscera_weight\": np.float64,\n", + " \"shell_weight\": np.float64,\n", + "}\n", + "label_column_dtype = {\"rings\": np.float64}\n", + "\n", + "\n", + "def merge_two_dicts(x, y):\n", + " \"\"\"Merges two dicts, returning a new copy.\"\"\"\n", + " z = x.copy()\n", + " z.update(y)\n", + " return z\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " logger.debug(\"Starting preprocessing.\")\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument(\"--input-data\", type=str, required=True)\n", + " args = parser.parse_args()\n", + "\n", + " base_dir = \"/opt/ml/processing\"\n", + " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", + " input_data = args.input_data\n", + " bucket = input_data.split(\"/\")[2]\n", + " key = \"/\".join(input_data.split(\"/\")[3:])\n", + "\n", + " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", + " fn = f\"{base_dir}/data/abalone-dataset.csv\"\n", + " s3 = boto3.resource(\"s3\")\n", + " s3.Bucket(bucket).download_file(key, fn)\n", + "\n", + " logger.debug(\"Reading downloaded data.\")\n", + " df = pd.read_csv(\n", + " fn,\n", + " header=None,\n", + " names=feature_columns_names + [label_column],\n", + " dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype),\n", + " )\n", + " os.unlink(fn)\n", + "\n", + " logger.debug(\"Defining transformers.\")\n", + " numeric_features = list(feature_columns_names)\n", + " numeric_features.remove(\"sex\")\n", + " numeric_transformer = Pipeline(\n", + " steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"scaler\", StandardScaler()),\n", + " ]\n", + " )\n", + "\n", + " categorical_features = [\"sex\"]\n", + " categorical_transformer = Pipeline(\n", + " steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", + " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\")),\n", + " ]\n", + " )\n", + "\n", + " preprocess = ColumnTransformer(\n", + " transformers=[\n", + " (\"num\", numeric_transformer, numeric_features),\n", + " (\"cat\", categorical_transformer, categorical_features),\n", + " ]\n", + " )\n", + "\n", + " logger.info(\"Applying transforms.\")\n", + " y = df.pop(\"rings\")\n", + " X_pre = preprocess.fit_transform(df)\n", + " y_pre = y.to_numpy().reshape(len(y), 1)\n", + "\n", + " X = np.concatenate((y_pre, X_pre), axis=1)\n", + "\n", + " logger.info(\n", + " \"Splitting %d rows of data into train, validation, test datasets.\", len(X)\n", + " )\n", + " np.random.shuffle(X)\n", + " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", + "\n", + " logger.info(\"Writing out datasets to %s.\", base_dir)\n", + " pd.DataFrame(train).to_csv(f\"{base_dir}/train/train.csv\", header=False, index=False)\n", + " pd.DataFrame(validation).to_csv(\n", + " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", + " )\n", + " pd.DataFrame(test).to_csv(f\"{base_dir}/test/test.csv\", header=False, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the training data step using a python script.\n", + "# Split the training data set into train, test, and validation datasets\n", + "\n", + "sklearn_processor = SKLearnProcessor(\n", + " framework_version=\"0.23-1\",\n", + " instance_type=processing_instance_type,\n", + " instance_count=processing_instance_count,\n", + " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneData\",\n", + " processor=sklearn_processor,\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(\n", + " output_name=\"validation\", source=\"/opt/ml/processing/validation\"\n", + " ),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"preprocess.py\",\n", + " job_arguments=[\"--input-data\", input_data],\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Model Training\n", + "\n", + "Train an XGBoost model with the output of the ProcessingStep." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the output path for the model artifacts from the Hyperparameter Tuning Job\n", + "model_path = f\"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain\"\n", + "\n", + "image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=training_instance_type,\n", + ")\n", + "\n", + "xgb_train = Estimator(\n", + " image_uri=image_uri,\n", + " instance_type=training_instance_type,\n", + " instance_count=1,\n", + " output_path=model_path,\n", + " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "\n", + "xgb_train.set_hyperparameters(\n", + " objective=\"reg:linear\",\n", + " num_round=50,\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.7,\n", + " silent=0,\n", + ")\n", + "\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " estimator=xgb_train,\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"train\"\n", + " ].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"validation\"\n", + " ].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " },\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Evaluate the model\n", + "\n", + "Use a processing job to evaluate the model from the TrainingStep. If the output of the evaluation is True, a model will be created and a Lambda will be invoked to deploy the model to a SageMaker Endpoint. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile evaluate.py\n", + "\n", + "\"\"\"Evaluation script for measuring mean squared error.\"\"\"\n", + "import json\n", + "import logging\n", + "import pathlib\n", + "import pickle\n", + "import tarfile\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xgboost\n", + "\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.INFO)\n", + "logger.addHandler(logging.StreamHandler())\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " logger.debug(\"Starting evaluation.\")\n", + " model_path = \"/opt/ml/processing/model/model.tar.gz\"\n", + " with tarfile.open(model_path) as tar:\n", + " tar.extractall(path=\".\")\n", + "\n", + " logger.debug(\"Loading xgboost model.\")\n", + " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", + "\n", + " logger.debug(\"Reading test data.\")\n", + " test_path = \"/opt/ml/processing/test/test.csv\"\n", + " df = pd.read_csv(test_path, header=None)\n", + "\n", + " logger.debug(\"Reading test data.\")\n", + " y_test = df.iloc[:, 0].to_numpy()\n", + " df.drop(df.columns[0], axis=1, inplace=True)\n", + " X_test = xgboost.DMatrix(df.values)\n", + "\n", + " logger.info(\"Performing predictions against test data.\")\n", + " predictions = model.predict(X_test)\n", + "\n", + " logger.debug(\"Calculating mean squared error.\")\n", + " mse = mean_squared_error(y_test, predictions)\n", + " std = np.std(y_test - predictions)\n", + " report_dict = {\n", + " \"regression_metrics\": {\n", + " \"mse\": {\"value\": mse, \"standard_deviation\": std},\n", + " },\n", + " }\n", + "\n", + " output_dir = \"/opt/ml/processing/evaluation\"\n", + " pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)\n", + "\n", + " logger.info(\"Writing out evaluation report with mse: %f\", mse)\n", + " evaluation_path = f\"{output_dir}/evaluation.json\"\n", + " with open(evaluation_path, \"w\") as f:\n", + " f.write(json.dumps(report_dict))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# A ProcessingStep is used to evaluate the performance of the trained model. Based on the results of the evaluation, the model is created and deployed.\n", + "\n", + "script_eval = ScriptProcessor(\n", + " image_uri=image_uri,\n", + " command=[\"python3\"],\n", + " instance_type=processing_instance_type,\n", + " instance_count=1,\n", + " base_job_name=f\"{prefix}/{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "\n", + "evaluation_report = PropertyFile(\n", + " name=\"AbaloneEvaluationReport\",\n", + " output_name=\"evaluation\",\n", + " path=\"evaluation.json\",\n", + ")\n", + "\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " processor=script_eval,\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\n", + " \"test\"\n", + " ].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(\n", + " output_name=\"evaluation\",\n", + " source=\"/opt/ml/processing/evaluation\",\n", + " destination=f\"s3://{default_bucket}/{s3_prefix}/evaluation_report\",\n", + " ),\n", + " ],\n", + " code=\"evaluate.py\",\n", + " property_files=[evaluation_report],\n", + " cache_config=cache_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create the model\n", + "\n", + "The model is created and the name of the model is provided to the Lambda function for deployment. The `CreateModelStep` dynamically assigns a name to the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create Model\n", + "model = Model(\n", + " image_uri=image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + " predictor_cls=XGBoostPredictor,\n", + ")\n", + "\n", + "step_create_model = CreateModelStep(\n", + " name=\"CreateModel\",\n", + " model=model,\n", + " inputs=sagemaker.inputs.CreateModelInput(instance_type=\"ml.m4.large\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create the Lambda Step\n", + "\n", + "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providng a script, function name and role for the Lambda function. \n", + "\n", + "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n", + "\n", + "The dictionary response from the Lambda function is parsed through the `LambdaOutput` objects provided to the `outputs` argument. The `output_name` in `LambdaOutput` corresponds to the dictionary key in the Lambda's return dictionary. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Define the Lambda function\n", + "\n", + "Users can choose the leverage the Lambda helper class to create a Lambda function and provide that function object to the LambdaStep. Alternatively, users can use a pre-deployed Lambda function and provide the function ARN to the `Lambda` helper class in the lambda step. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lambda_helper.py\n", + "\n", + "\"\"\"\n", + "This Lambda function creates an Endpoint Configuration and deploys a model to an Endpoint. \n", + "The name of the model to deploy is provided via the `event` argument\n", + "\"\"\"\n", + "\n", + "import json\n", + "import boto3\n", + "\n", + "\n", + "def lambda_handler(event, context):\n", + " \"\"\" \"\"\"\n", + " sm_client = boto3.client(\"sagemaker\")\n", + "\n", + " # The name of the model created in the Pipeline CreateModelStep\n", + " model_name = event[\"model_name\"]\n", + "\n", + " endpoint_config_name = event[\"endpoint_config_name\"]\n", + " endpoint_name = event[\"endpoint_name\"]\n", + "\n", + " create_endpoint_config_response = sm_client.create_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name,\n", + " ProductionVariants=[\n", + " {\n", + " \"InstanceType\": \"ml.m4.xlarge\",\n", + " \"InitialVariantWeight\": 1,\n", + " \"InitialInstanceCount\": 1,\n", + " \"ModelName\": model_name,\n", + " \"VariantName\": \"AllTraffic\",\n", + " }\n", + " ],\n", + " )\n", + "\n", + " create_endpoint_response = sm_client.create_endpoint(\n", + " EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name\n", + " )\n", + "\n", + " return {\n", + " \"statusCode\": 200,\n", + " \"body\": json.dumps(\"Created Endpoint!\"),\n", + " \"other_key\": \"example_value\",\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### IAM Role\n", + "\n", + "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lambda_role = None\n", + "assert lambda_role is not None, \"Lambda role must be provided!\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Custom Lambda Step\n", + "\n", + "current_time = time.strftime(\"%m-%d-%H-%M-%S\", time.localtime())\n", + "model_name = \"demo-lambda-model\" + current_time\n", + "endpoint_config_name = \"demo-lambda-deploy-endpoint-config-\" + current_time\n", + "endpoint_name = \"demo-lambda-deploy-endpoint-\" + current_time\n", + "\n", + "function_name = \"sagemaker-lambda-step-endpoint-deploy-\" + current_time\n", + "\n", + "# Lambda helper class can be used to create the Lambda function\n", + "func = Lambda(\n", + " function_name=function_name,\n", + " execution_role_arn=lambda_role,\n", + " script=\"lambda_helper.py\",\n", + " handler=\"lambda_helper.lambda_handler\",\n", + ")\n", + "\n", + "output_param_1 = LambdaOutput(\n", + " output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String\n", + ")\n", + "output_param_2 = LambdaOutput(\n", + " output_name=\"body\", output_type=LambdaOutputTypeEnum.String\n", + ")\n", + "output_param_3 = LambdaOutput(\n", + " output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String\n", + ")\n", + "\n", + "step_deploy_lambda = LambdaStep(\n", + " name=\"LambdaStep\",\n", + " lambda_func=func,\n", + " inputs={\n", + " \"model_name\": step_create_model.properties.ModelName,\n", + " \"endpoint_config_name\": endpoint_config_name,\n", + " \"endpoint_name\": endpoint_name,\n", + " },\n", + " outputs=[output_param_1, output_param_2, output_param_3],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# condition step for evaluating model quality and branching execution.\n", + "# The `json_path` value is based on the `report_dict` variable in `evaluate.py`\n", + "\n", + "cond_lte = ConditionLessThanOrEqualTo(\n", + " left=JsonGet(\n", + " step=step_eval,\n", + " property_file=evaluation_report,\n", + " json_path=\"regression_metrics.mse.value\",\n", + " ),\n", + " right=6.0,\n", + ")\n", + "\n", + "step_cond = ConditionStep(\n", + " name=\"CheckMSEAbaloneEvaluation\",\n", + " conditions=[cond_lte],\n", + " if_steps=[step_create_model, step_deploy_lambda],\n", + " else_steps=[],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the same pipeline name across execution for cache usage.\n", + "\n", + "pipeline_name = \"lambda-step-pipeline\" + current_time\n", + "\n", + "pipeline = Pipeline(\n", + " name=pipeline_name,\n", + " parameters=[\n", + " processing_instance_type,\n", + " processing_instance_count,\n", + " training_instance_type,\n", + " input_data,\n", + " model_approval_status,\n", + " ],\n", + " steps=[step_process, step_train, step_eval, step_cond],\n", + " sagemaker_session=sagemaker_session,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Execute the Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "definition = json.loads(pipeline.definition())\n", + "definition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline.upsert(role_arn=role)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "execution = pipeline.start()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "execution.wait()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Cleaning up resources\n", + "\n", + "Running the following cell will delete the following resources created in this notebook -\n", + "* SageMaker Model\n", + "* SageMaker Endpoint Configuration\n", + "* SageMaker Endpoint\n", + "* SageMaker Pipeline\n", + "* Lambda Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a SageMaker client\n", + "sm_client = boto3.client(\"sagemaker\")\n", + "\n", + "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available outside the Pipeline execution context\n", + "# so `step_create_model.properties.ModelName` can not be used while deleting the model.\n", + "model_name = sm_client.describe_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name\n", + ")[\"ProductionVariants\"][0][\"ModelName\"]\n", + "\n", + "# Delete the Model\n", + "sm_client.delete_model(ModelName=model_name)\n", + "\n", + "# Delete the EndpointConfig\n", + "sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)\n", + "\n", + "# Delete the endpoint\n", + "sm_client.delete_endpoint(EndpointName=endpoint_name)\n", + "\n", + "# Delete the Lambda function\n", + "func.delete()\n", + "\n", + "# Delete the Pipeline\n", + "sm_client.delete_pipeline(PipelineName=pipeline_name)" + ] + } + ], + "metadata": { + "instance_type": "ml.t3.medium", + "kernelspec": { + "display_name": "Python 3.9.4 64-bit ('python@3.9')", + "name": "python394jvsc74a57bd0ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" + }, + "language_info": { + "name": "python", + "version": "" + }, + "metadata": { + "interpreter": { + "hash": "ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 0d621e0b032e82140e2b784e0c3b5ba4e925d529 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 29 Jul 2021 17:39:24 +0000 Subject: [PATCH 02/10] Updated notes on IAM policy for Lambda --- .../sagemaker-pipelines-lambda-step.ipynb | 69 ++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 81c6b617f0..161ede6e25 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -675,7 +675,74 @@ "source": [ "#### IAM Role\n", "\n", - "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. " + "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n", + "\n", + "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", + "\n", + "If the notebook execution role has `iam:CreateRole` permission, the following code snipped can be used to create a role for the Lambda function - \n", + "\n", + "```\n", + "path='/'\n", + "role_name='lambda-pipelines-role'\n", + "\n", + "trust_policy={\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Principal\": {\n", + " \"Service\": \"lambda.amazonaws.com\"\n", + " },\n", + " \"Action\": \"sts:AssumeRole\"\n", + " }\n", + " ]\n", + "}\n", + "\n", + "response = iam.create_role(\n", + " Path=path,\n", + " RoleName=role_name,\n", + " AssumeRolePolicyDocument=json.dumps(trust_policy),\n", + " MaxSessionDuration=3600\n", + ")\n", + "\n", + "lambda_policy = {\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": \"logs:CreateLogGroup\",\n", + " \"Resource\": f\"arn:aws:logs:{region}:{account_id}:*\"\n", + " },\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"logs:CreateLogStream\",\n", + " \"logs:PutLogEvents\"\n", + " ],\n", + " \"Resource\": [\n", + " f\"arn:aws:logs:{region}:{account_id}:log-group:/aws/lambda/*:*\"\n", + " ]\n", + " }\n", + " ]\n", + "}\n", + "\n", + "response = iam.create_policy(\n", + " PolicyName='lambda-policy-for-pipelines',\n", + " PolicyDocument=json.dumps(lambda_policy)\n", + ")\n", + "\n", + "iam.attach_role_policy(\n", + " PolicyArn=f'arn:aws:iam::${account_id}:policy/lambda-sagemaker-policy',\n", + " RoleName=role_name\n", + ")\n", + "\n", + "### The policy used here is a FullAccess policy for SageMaker. It is best practice to provide policies with least priviledges. \n", + "iam.attach_role_policy(\n", + " PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',\n", + " RoleName=role_name\n", + ")\n", + "\n", + "```" ] }, { From a32c80abef04d50b826ad71feddc7cbfb9899113 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 29 Jul 2021 17:49:15 +0000 Subject: [PATCH 03/10] Updated IAM notes --- .../sagemaker-pipelines-lambda-step.ipynb | 67 +------------------ 1 file changed, 2 insertions(+), 65 deletions(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 161ede6e25..891a5ac7c0 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -677,72 +677,9 @@ "\n", "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n", "\n", - "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", + "The role needs Lambda basic execution permissions, Lambda trust policy, and any other sagemaker permissions based on what the Lambda function is doing\n", "\n", - "If the notebook execution role has `iam:CreateRole` permission, the following code snipped can be used to create a role for the Lambda function - \n", - "\n", - "```\n", - "path='/'\n", - "role_name='lambda-pipelines-role'\n", - "\n", - "trust_policy={\n", - " \"Version\": \"2012-10-17\",\n", - " \"Statement\": [\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Principal\": {\n", - " \"Service\": \"lambda.amazonaws.com\"\n", - " },\n", - " \"Action\": \"sts:AssumeRole\"\n", - " }\n", - " ]\n", - "}\n", - "\n", - "response = iam.create_role(\n", - " Path=path,\n", - " RoleName=role_name,\n", - " AssumeRolePolicyDocument=json.dumps(trust_policy),\n", - " MaxSessionDuration=3600\n", - ")\n", - "\n", - "lambda_policy = {\n", - " \"Version\": \"2012-10-17\",\n", - " \"Statement\": [\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Action\": \"logs:CreateLogGroup\",\n", - " \"Resource\": f\"arn:aws:logs:{region}:{account_id}:*\"\n", - " },\n", - " {\n", - " \"Effect\": \"Allow\",\n", - " \"Action\": [\n", - " \"logs:CreateLogStream\",\n", - " \"logs:PutLogEvents\"\n", - " ],\n", - " \"Resource\": [\n", - " f\"arn:aws:logs:{region}:{account_id}:log-group:/aws/lambda/*:*\"\n", - " ]\n", - " }\n", - " ]\n", - "}\n", - "\n", - "response = iam.create_policy(\n", - " PolicyName='lambda-policy-for-pipelines',\n", - " PolicyDocument=json.dumps(lambda_policy)\n", - ")\n", - "\n", - "iam.attach_role_policy(\n", - " PolicyArn=f'arn:aws:iam::${account_id}:policy/lambda-sagemaker-policy',\n", - " RoleName=role_name\n", - ")\n", - "\n", - "### The policy used here is a FullAccess policy for SageMaker. It is best practice to provide policies with least priviledges. \n", - "iam.attach_role_policy(\n", - " PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',\n", - " RoleName=role_name\n", - ")\n", - "\n", - "```" + "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. " ] }, { From f67a731e2eeff9527bfdd4c173584f479bc91959 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Mon, 2 Aug 2021 14:29:34 +0000 Subject: [PATCH 04/10] Added pip install --- .../sagemaker-pipelines-lambda-step.ipynb | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 891a5ac7c0..c14baeab43 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -85,8 +85,9 @@ "metadata": {}, "outputs": [], "source": [ - "# import sys\n", - "# !{sys.executable} -m pip install \"sagemaker>=2.49.2\"" + "import sys\n", + "\n", + "!{sys.executable} -m pip install \"sagemaker>=2.50.0\"" ] }, { @@ -882,12 +883,21 @@ "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3.9.4 64-bit ('python@3.9')", - "name": "python394jvsc74a57bd0ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc" + "display_name": "Python 3 (Data Science)", + "language": "python", + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" }, "metadata": { "interpreter": { From 0e8f82df5bb8f6d303571ce34d6e3d68b9d2c83b Mon Sep 17 00:00:00 2001 From: kthadaka Date: Tue, 3 Aug 2021 17:04:05 +0000 Subject: [PATCH 05/10] Updated pip install --- .../tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index c14baeab43..5b41fe9fdf 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -87,7 +87,7 @@ "source": [ "import sys\n", "\n", - "!{sys.executable} -m pip install \"sagemaker>=2.50.0\"" + "!{sys.executable} -m pip install \"sagemaker>=2.51.0\"" ] }, { From cd721543285a5ec63a2dac6d5586e5f7b11b7884 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 5 Aug 2021 18:18:37 +0000 Subject: [PATCH 06/10] Added iam helper function --- .../tabular/lambda-step/iam_helper.py | 42 +++++++++++++++++++ .../sagemaker-pipelines-lambda-step.ipynb | 9 ++-- 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 sagemaker-pipelines/tabular/lambda-step/iam_helper.py diff --git a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py new file mode 100644 index 0000000000..deb4d3b60a --- /dev/null +++ b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py @@ -0,0 +1,42 @@ +import boto3 +import json + +iam = boto3.client('iam') + +def create_lambda_role(role_name): + try: + response = iam.create_role( + RoleName = role_name, + AssumeRolePolicyDocument = json.dumps({ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] + }), + Description='Role for Lambda to call ECS Fargate task' + ) + + role_arn = response['Role']['Arn'] + + response = iam.attach_role_policy( + RoleName=role_name, + PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole' + ) + + response = iam.attach_role_policy( + PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess', + RoleName=role_name + ) + + return role_arn + + except iam.exceptions.EntityAlreadyExistsException: + print(f'Using ARN from existing role: {role_name}') + response = iam.get_role(RoleName=role_name) + return response['Role']['Arn'] \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 5b41fe9fdf..02937bfded 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -678,9 +678,9 @@ "\n", "The Lambda function needs an IAM role that will allow it to deploy a SageMaker Endpoint. The role ARN must be provided in the LambdaStep. \n", "\n", - "The role needs Lambda basic execution permissions, Lambda trust policy, and any other sagemaker permissions based on what the Lambda function is doing\n", + "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", "\n", - "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. " + "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least priviledges as per AWS IAM best practices." ] }, { @@ -689,8 +689,9 @@ "metadata": {}, "outputs": [], "source": [ - "lambda_role = None\n", - "assert lambda_role is not None, \"Lambda role must be provided!\"" + "from iam_helper import create_lambda_role\n", + "\n", + "lambda_role = create_lambda_role(\"lambda-deployment-role\")" ] }, { From 20fb4ca9e024f967a53dfd74ef43df90e19cebf9 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 5 Aug 2021 19:18:09 +0000 Subject: [PATCH 07/10] Updated role description --- sagemaker-pipelines/tabular/lambda-step/iam_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py index deb4d3b60a..51ff704ff0 100644 --- a/sagemaker-pipelines/tabular/lambda-step/iam_helper.py +++ b/sagemaker-pipelines/tabular/lambda-step/iam_helper.py @@ -19,7 +19,7 @@ def create_lambda_role(role_name): } ] }), - Description='Role for Lambda to call ECS Fargate task' + Description='Role for Lambda to call SageMaker functions' ) role_arn = response['Role']['Arn'] From b43ae3e32c8edaaf10fbb30b8e64c6fe6de41647 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Thu, 5 Aug 2021 21:04:18 +0000 Subject: [PATCH 08/10] Fixed typos --- .../lambda-step/sagemaker-pipelines-lambda-step.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 02937bfded..e1fc5f1495 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -13,7 +13,7 @@ "* Train an XGBoost Model\n", "* Evaluate the model performance\n", "* Create a model\n", - "* Deploy the model to a Sagemaker Hosted Endpoint using a Lambda Function\n", + "* Deploy the model to a SageMaker Hosted Endpoint using a Lambda Function\n", "\n", "A step to register the model into a Model Registry can be added to the pipeline using the `RegisterModel` step." ] @@ -603,7 +603,7 @@ "source": [ "#### Create the Lambda Step\n", "\n", - "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providng a script, function name and role for the Lambda function. \n", + "When defining the LambdaStep, the SageMaker Lambda helper class provides helper functions for creating the Lambda function. Users can either use the `lambda_func` argument to provide the function ARN to an already deployed Lambda function OR use the `Lambda` class to create a Lambda function by providing a script, function name and role for the Lambda function. \n", "\n", "When passing inputs to the Lambda, the `inputs` argument can be used and within the Lambda function's handler, the `event` argument can be used to retrieve the inputs.\n", "\n", @@ -680,7 +680,7 @@ "\n", "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", "\n", - "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least priviledges as per AWS IAM best practices." + "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least privileges as per AWS IAM best practices." ] }, { From 54eab65cb493db6f069dab3334af6d00fe523b6d Mon Sep 17 00:00:00 2001 From: kthadaka Date: Fri, 6 Aug 2021 14:00:48 +0000 Subject: [PATCH 09/10] Updated md cells --- .../tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index e1fc5f1495..9e19a0f280 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -680,7 +680,7 @@ "\n", "The Lambda role should at minimum have policies to allow `sagemaker:CreateModel`, `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint` in addition to the based Lambda execution policies. \n", "\n", - "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy of least privileges as per AWS IAM best practices." + "A helper function in `iam_helper.py` is available to create the Lambda function role. Please note that the role uses the Amazon managed policy - `SageMakerFullAccess`. This should be replaced with an IAM policy with least privileges as per AWS IAM best practices." ] }, { From 8fb68ca2960aa94f4154946c5c733c6f0b1f0a97 Mon Sep 17 00:00:00 2001 From: kthadaka Date: Fri, 6 Aug 2021 18:57:25 +0000 Subject: [PATCH 10/10] updated formatting --- .../sagemaker-pipelines-lambda-step.ipynb | 46 ++++++------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 9e19a0f280..69df9cf634 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -148,9 +148,7 @@ "region = sagemaker.Session().boto_region_name\n", "sm_client = boto3.client(\"sagemaker\")\n", "boto_session = boto3.Session(region_name=region)\n", - "sagemaker_session = sagemaker.session.Session(\n", - " boto_session=boto_session, sagemaker_client=sm_client\n", - ")\n", + "sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)\n", "prefix = \"lambda-step-pipeline\"\n", "\n", "account_id = boto3.client(\"sts\").get_caller_identity().get(\"Account\")" @@ -169,15 +167,11 @@ "base_job_prefix = \"lambda-step-example\"\n", "s3_prefix = \"lambda-step-pipeline\"\n", "\n", - "processing_instance_count = ParameterInteger(\n", - " name=\"ProcessingInstanceCount\", default_value=1\n", - ")\n", + "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", "processing_instance_type = ParameterString(\n", " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", ")\n", - "training_instance_type = ParameterString(\n", - " name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n", - ")\n", + "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", "model_approval_status = ParameterString(\n", " name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n", ")\n", @@ -325,9 +319,7 @@ "\n", " X = np.concatenate((y_pre, X_pre), axis=1)\n", "\n", - " logger.info(\n", - " \"Splitting %d rows of data into train, validation, test datasets.\", len(X)\n", - " )\n", + " logger.info(\"Splitting %d rows of data into train, validation, test datasets.\", len(X))\n", " np.random.shuffle(X)\n", " train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))])\n", "\n", @@ -361,9 +353,7 @@ " processor=sklearn_processor,\n", " outputs=[\n", " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", - " ProcessingOutput(\n", - " output_name=\"validation\", source=\"/opt/ml/processing/validation\"\n", - " ),\n", + " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", " ],\n", " code=\"preprocess.py\",\n", @@ -424,9 +414,7 @@ " estimator=xgb_train,\n", " inputs={\n", " \"train\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", - " \"train\"\n", - " ].S3Output.S3Uri,\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", " content_type=\"text/csv\",\n", " ),\n", " \"validation\": TrainingInput(\n", @@ -547,9 +535,7 @@ " destination=\"/opt/ml/processing/model\",\n", " ),\n", " ProcessingInput(\n", - " source=step_process.properties.ProcessingOutputConfig.Outputs[\n", - " \"test\"\n", - " ].S3Output.S3Uri,\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", " destination=\"/opt/ml/processing/test\",\n", " ),\n", " ],\n", @@ -717,15 +703,9 @@ " handler=\"lambda_helper.lambda_handler\",\n", ")\n", "\n", - "output_param_1 = LambdaOutput(\n", - " output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String\n", - ")\n", - "output_param_2 = LambdaOutput(\n", - " output_name=\"body\", output_type=LambdaOutputTypeEnum.String\n", - ")\n", - "output_param_3 = LambdaOutput(\n", - " output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String\n", - ")\n", + "output_param_1 = LambdaOutput(output_name=\"statusCode\", output_type=LambdaOutputTypeEnum.String)\n", + "output_param_2 = LambdaOutput(output_name=\"body\", output_type=LambdaOutputTypeEnum.String)\n", + "output_param_3 = LambdaOutput(output_name=\"other_key\", output_type=LambdaOutputTypeEnum.String)\n", "\n", "step_deploy_lambda = LambdaStep(\n", " name=\"LambdaStep\",\n", @@ -860,9 +840,9 @@ "\n", "# Get the model name from the EndpointCofig. The CreateModelStep properties are not available outside the Pipeline execution context\n", "# so `step_create_model.properties.ModelName` can not be used while deleting the model.\n", - "model_name = sm_client.describe_endpoint_config(\n", - " EndpointConfigName=endpoint_config_name\n", - ")[\"ProductionVariants\"][0][\"ModelName\"]\n", + "model_name = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)[\n", + " \"ProductionVariants\"\n", + "][0][\"ModelName\"]\n", "\n", "# Delete the Model\n", "sm_client.delete_model(ModelName=model_name)\n",