From fe5acd52ba8e016b675b66b709ec6c592ac6aee6 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Tue, 16 Aug 2022 13:31:20 -0700 Subject: [PATCH 01/18] fix pipe_bring_your_own.ipynb (#3547) * fix pipe_bring_your_own.ipynb * login before pushing to docker * login before pushing to docker * fix login issues * fix login issues * revert login fix code Co-authored-by: EC2 Default User --- .../pipe_bring_your_own/pipe_bring_your_own.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/advanced_functionality/pipe_bring_your_own/pipe_bring_your_own.ipynb b/advanced_functionality/pipe_bring_your_own/pipe_bring_your_own.ipynb index 32e05c3714..746a55d1ef 100644 --- a/advanced_functionality/pipe_bring_your_own/pipe_bring_your_own.ipynb +++ b/advanced_functionality/pipe_bring_your_own/pipe_bring_your_own.ipynb @@ -203,6 +203,7 @@ "%%sh\n", "REGION=$(aws configure get region)\n", "account=$(aws sts get-caller-identity --query Account --output text)\n", + "docker login --username AWS --password $(aws ecr get-login-password --region us-west-2) 763104351884.dkr.ecr.us-west-2.amazonaws.com\n", "aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${account}.dkr.ecr.${REGION}.amazonaws.com" ] }, From 7031013b1822d07f56f81f48b614ae7d5830c769 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Tue, 16 Aug 2022 14:19:22 -0700 Subject: [PATCH 02/18] fix sagemaker-pipelines/time_series_forecasting/amazon_forecast_pipeline/sm_pipeline_with_amazon_forecast.ipynb (#3548) Co-authored-by: EC2 Default User --- .../sm_pipeline_with_amazon_forecast.ipynb | 111 ++++++++++-------- 1 file changed, 59 insertions(+), 52 deletions(-) diff --git a/sagemaker-pipelines/time_series_forecasting/amazon_forecast_pipeline/sm_pipeline_with_amazon_forecast.ipynb b/sagemaker-pipelines/time_series_forecasting/amazon_forecast_pipeline/sm_pipeline_with_amazon_forecast.ipynb index 4904fbbfe7..c49389ab19 100644 --- a/sagemaker-pipelines/time_series_forecasting/amazon_forecast_pipeline/sm_pipeline_with_amazon_forecast.ipynb +++ b/sagemaker-pipelines/time_series_forecasting/amazon_forecast_pipeline/sm_pipeline_with_amazon_forecast.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "0c9ee48f", + "id": "91ee6b6d", "metadata": {}, "source": [ "# Creating an Amazon Forecast Predictor with SageMaker Pipelines\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "932dc4d0", + "id": "86a4678e", "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "markdown", - "id": "de12d5a8", + "id": "b0125efc", "metadata": {}, "source": [ "Finally, you will need the following trust policies." @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": null, - "id": "957d4d1f", + "id": "8050bbca", "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "45298f90", + "id": "9ed30cce", "metadata": {}, "source": [ "## Prerequisites\n", @@ -95,7 +95,17 @@ { "cell_type": "code", "execution_count": null, - "id": "9ab8df52", + "id": "1d137518", + "metadata": {}, + "outputs": [], + "source": [ + "! pip install sagemaker==2.93.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b763c7b0", "metadata": {}, "outputs": [], "source": [ @@ -135,7 +145,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51f2beea", + "id": "ad1e02a1", "metadata": {}, "outputs": [], "source": [ @@ -189,7 +199,7 @@ }, { "cell_type": "markdown", - "id": "fff1a8b5", + "id": "23cb4c19", "metadata": {}, "source": [ "## Dataset\n", @@ -200,7 +210,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e4367c4a", + "id": "d71ab7f2", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +236,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a4a1d4a", + "id": "8ce30a2a", "metadata": {}, "outputs": [], "source": [ @@ -243,7 +253,7 @@ }, { "cell_type": "markdown", - "id": "db24153a", + "id": "f58aea79", "metadata": {}, "source": [ "The dataset happens to span January 01, 2011, to January 01, 2015. We are only going to use about two and a half week's of hourly data to train Amazon Forecast. \n", @@ -253,7 +263,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ac099d3c", + "id": "724eee5f", "metadata": {}, "outputs": [], "source": [ @@ -262,7 +272,7 @@ }, { "cell_type": "markdown", - "id": "d114bd69", + "id": "18b87844", "metadata": {}, "source": [ "Next, we define parameters that can be set for the execution of the pipeline. They serve as variables. We define the following:\n", @@ -286,7 +296,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f1d86c83", + "id": "52ba0c45", "metadata": {}, "outputs": [], "source": [ @@ -294,7 +304,6 @@ "processing_instance_type = ParameterString(\n", " name=\"ProcessingInstanceType\", default_value=\"ml.m5.large\"\n", ")\n", - "training_instance_count = ParameterInteger(name=\"TrainingInstanceCount\", default_value=1)\n", "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.large\")\n", "\n", "input_train = ParameterString(\n", @@ -312,7 +321,7 @@ }, { "cell_type": "markdown", - "id": "eff2dad9", + "id": "3a2ee68c", "metadata": {}, "source": [ "We use an updated [SKLearnProcessor](https://sagemaker.readthedocs.io/en/stable/frameworks/sklearn/sagemaker.sklearn.html#sagemaker.sklearn.processing.SKLearnProcessor) to run Python scripts to build a dataset group and train an Amazon Forecast predictor using `boto3`. In the next chunk, we instantiate an instance of `ScriptProcessor`, which is essentially an SKLearnProcessor with updated `boto3` and `botocore` (as built above) that we use in the next steps. " @@ -321,7 +330,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11e82c55", + "id": "130c2059", "metadata": {}, "outputs": [], "source": [ @@ -336,7 +345,7 @@ { "cell_type": "code", "execution_count": null, - "id": "88fb293a", + "id": "2abf7b80", "metadata": {}, "outputs": [], "source": [ @@ -353,7 +362,7 @@ }, { "cell_type": "markdown", - "id": "5d40d2b1", + "id": "26bd50c0", "metadata": {}, "source": [ "First we preprocess the data using an Amazon SageMaker [ProcessingStep](https://sagemaker.readthedocs.io/en/stable/workflows/pipelines/sagemaker.workflow.pipelines.html?highlight=ProcessingStep#sagemaker.workflow.steps.ProcessingStep) that provides a containerized execution environment to run the `preprocess.py` script." @@ -362,7 +371,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b5d84ca3", + "id": "aa0259f4", "metadata": {}, "outputs": [], "source": [ @@ -383,7 +392,7 @@ }, { "cell_type": "markdown", - "id": "6d4b1540", + "id": "6e05150d", "metadata": {}, "source": [ "The next step is to train and evaluate the forecasting model calling Amazon Forecast using `boto3`. We instantiate an instance of `SKLearn` estimator that we use in the next `TrainingStep` to run the script `train.py`. \n", @@ -394,7 +403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b80ada3f", + "id": "95177b2f", "metadata": {}, "outputs": [], "source": [ @@ -425,7 +434,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b52d154a", + "id": "cf10e258", "metadata": {}, "outputs": [], "source": [ @@ -433,7 +442,6 @@ " entry_point=\"train.py\",\n", " role=role_arn,\n", " image_uri=container_image_uri,\n", - " instance_count=training_instance_count,\n", " instance_type=training_instance_type,\n", " sagemaker_session=sagemaker_session,\n", " base_job_name=\"forecast-train\",\n", @@ -446,7 +454,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5ddecce", + "id": "82f5536a", "metadata": {}, "outputs": [], "source": [ @@ -455,7 +463,7 @@ }, { "cell_type": "markdown", - "id": "29f0d4d4", + "id": "867f2daf", "metadata": {}, "source": [ "The third step is an Amazon SageMaker ProcessingStep that deletes or keeps the Amazon Forecast model running using the script `conditional_delete.py`. If the error reported after training is higher than a threshold you specify for the metric you specify, this step deletes all the resources created by Amazon Forecast that are related to the pipeline's execution.\n", @@ -465,7 +473,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43c79816", + "id": "f6122249", "metadata": {}, "outputs": [], "source": [ @@ -492,7 +500,7 @@ }, { "cell_type": "markdown", - "id": "41ef4915", + "id": "991697b7", "metadata": {}, "source": [ "Finally, we combine all the steps and define our pipeline." @@ -501,7 +509,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7cf7b196", + "id": "fdc925a3", "metadata": {}, "outputs": [], "source": [ @@ -513,7 +521,6 @@ " parameters=[\n", " processing_instance_type,\n", " processing_instance_count,\n", - " training_instance_count,\n", " training_instance_type,\n", " input_train,\n", " forecast_horizon,\n", @@ -532,7 +539,7 @@ }, { "cell_type": "markdown", - "id": "c838b490", + "id": "681b8721", "metadata": {}, "source": [ "Once the pipeline is successfully defined, we can start the execution." @@ -541,7 +548,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1cbe62f1", + "id": "5b375f45", "metadata": {}, "outputs": [], "source": [ @@ -551,7 +558,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35e9c22d", + "id": "b2fec897", "metadata": {}, "outputs": [], "source": [ @@ -561,7 +568,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ccb70b34", + "id": "72464cc3", "metadata": {}, "outputs": [], "source": [ @@ -571,7 +578,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a20d8f39", + "id": "e66f34a3", "metadata": {}, "outputs": [], "source": [ @@ -580,7 +587,7 @@ }, { "cell_type": "markdown", - "id": "1e285dfd", + "id": "c5c56dff", "metadata": {}, "source": [ "## Experiments Tracking\n", @@ -602,7 +609,7 @@ }, { "cell_type": "markdown", - "id": "067b7888", + "id": "a0030897", "metadata": {}, "source": [ "## Conclusion" @@ -610,7 +617,7 @@ }, { "cell_type": "markdown", - "id": "40a6ba7e", + "id": "132ad067", "metadata": {}, "source": [ "In this notebook we have seen how to create a SageMaker Pipeline to train an Amazon Forecast predictor on your own dataset with a target and related time series." @@ -618,7 +625,7 @@ }, { "cell_type": "markdown", - "id": "93c99720", + "id": "d10d8baf", "metadata": {}, "source": [ "## Clean up\n", @@ -629,7 +636,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bc956081", + "id": "c8665320", "metadata": {}, "outputs": [], "source": [ @@ -654,7 +661,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4c2e6e8a", + "id": "6a234cbf", "metadata": {}, "outputs": [], "source": [ @@ -670,7 +677,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e50ca583", + "id": "b269f192", "metadata": {}, "outputs": [], "source": [ @@ -680,7 +687,7 @@ { "cell_type": "code", "execution_count": null, - "id": "82e5928a", + "id": "a8828ef6", "metadata": {}, "outputs": [], "source": [ @@ -690,7 +697,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44212447", + "id": "abeda944", "metadata": {}, "outputs": [], "source": [ @@ -708,7 +715,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41649cfd", + "id": "d64d4ae5", "metadata": {}, "outputs": [], "source": [ @@ -720,7 +727,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a9fd48dc", + "id": "33d2861f", "metadata": {}, "outputs": [], "source": [ @@ -733,7 +740,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cc00b557", + "id": "d0e4bd9d", "metadata": {}, "outputs": [], "source": [ @@ -744,7 +751,7 @@ { "cell_type": "code", "execution_count": null, - "id": "336300e0", + "id": "d9968b15", "metadata": {}, "outputs": [], "source": [ @@ -756,7 +763,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8eed39f2", + "id": "1440c84e", "metadata": {}, "outputs": [], "source": [ @@ -767,9 +774,9 @@ "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "conda_python3", "language": "python", - "name": "python3" + "name": "conda_python3" }, "language_info": { "codemirror_mode": { @@ -781,7 +788,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.11" + "version": "3.8.12" } }, "nbformat": 4, From 0fbc28c37e9297556e296dda508abce305631d12 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:13:28 -0700 Subject: [PATCH 03/18] rename FastAPI Example.ipynb (#3550) Co-authored-by: EC2 Default User --- .../{FastAPI Example.ipynb => FastAPI_Example.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r_examples/r_serving_with_fastapi/{FastAPI Example.ipynb => FastAPI_Example.ipynb} (100%) diff --git a/r_examples/r_serving_with_fastapi/FastAPI Example.ipynb b/r_examples/r_serving_with_fastapi/FastAPI_Example.ipynb similarity index 100% rename from r_examples/r_serving_with_fastapi/FastAPI Example.ipynb rename to r_examples/r_serving_with_fastapi/FastAPI_Example.ipynb From 3e868a4539b54ec35400496cd67ec0abb942fa92 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:13:38 -0700 Subject: [PATCH 04/18] fix RestRServe Example (#3553) --- r_examples/r_serving_with_restrserve/Dockerfile | 4 +++- .../{RestRServe Example.ipynb => RestRServe_Example.ipynb} | 0 2 files changed, 3 insertions(+), 1 deletion(-) rename r_examples/r_serving_with_restrserve/{RestRServe Example.ipynb => RestRServe_Example.ipynb} (100%) diff --git a/r_examples/r_serving_with_restrserve/Dockerfile b/r_examples/r_serving_with_restrserve/Dockerfile index 69dc88b8c2..5aaaf57689 100644 --- a/r_examples/r_serving_with_restrserve/Dockerfile +++ b/r_examples/r_serving_with_restrserve/Dockerfile @@ -2,7 +2,9 @@ FROM r-base:3.6.3 MAINTAINER Amazon SageMaker Examples -RUN R -e "install.packages(c('RestRserve','xgboost','dplyr'), repos='https://cloud.r-project.org')" +RUN R -e "install.packages(c('RestRserve','data.table', 'stringi', 'dplyr'), repos='https://cloud.r-project.org')" +RUN wget http://cran.r-project.org/src/contrib/Archive/xgboost/xgboost_1.4.1.1.tar.gz +RUN R CMD INSTALL xgboost_1.4.1.1.tar.gz COPY xgb.model /opt/ml/xgb.model COPY restrserve.R /opt/ml/restrserve.R diff --git a/r_examples/r_serving_with_restrserve/RestRServe Example.ipynb b/r_examples/r_serving_with_restrserve/RestRServe_Example.ipynb similarity index 100% rename from r_examples/r_serving_with_restrserve/RestRServe Example.ipynb rename to r_examples/r_serving_with_restrserve/RestRServe_Example.ipynb From 16e56a62ea35254b4794030f329a050f7b7e9e30 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:14:03 -0700 Subject: [PATCH 05/18] rename Plumber Example.ipynb (#3551) Co-authored-by: EC2 Default User --- .../{Plumber Example.ipynb => Plumber_Example.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename r_examples/r_serving_with_plumber/{Plumber Example.ipynb => Plumber_Example.ipynb} (100%) diff --git a/r_examples/r_serving_with_plumber/Plumber Example.ipynb b/r_examples/r_serving_with_plumber/Plumber_Example.ipynb similarity index 100% rename from r_examples/r_serving_with_plumber/Plumber Example.ipynb rename to r_examples/r_serving_with_plumber/Plumber_Example.ipynb From 8159d5d3582a60a3fa345115c64184dce61be6e4 Mon Sep 17 00:00:00 2001 From: qidewenwhen <32910701+qidewenwhen@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:45:25 -0700 Subject: [PATCH 06/18] change: Update callback step notebook as per recent sdk changes and fix existing issues (#3516) Co-authored-by: Dewen Qi Co-authored-by: Julia Kroll <75504951+jkroll-aws@users.noreply.github.com> --- ...ain-evaluate-batch-transform_outputs.ipynb | 8 +- .../sagemaker-pipelines-callback-step.ipynb | 90 +++--- .../setup_iam_roles.py | 301 ++++++++---------- .../sagemaker-pipelines-lambda-step.ipynb | 2 +- ...emaker-pipelines-lambda-step_outputs.ipynb | 8 +- 5 files changed, 186 insertions(+), 223 deletions(-) diff --git a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb index d20c0f6d83..21102fda26 100644 --- a/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb +++ b/sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb @@ -272,9 +272,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n", - "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", - "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n" + "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\r\n", + "\u001B[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001B[0m\r\n" ] } ], @@ -3441,4 +3441,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb b/sagemaker-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb index a36145affd..263936108a 100644 --- a/sagemaker-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb +++ b/sagemaker-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb @@ -899,7 +899,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install \"sagemaker==2.91.1\"" + "!pip install \"sagemaker>=2.99.0\"" ] }, { @@ -977,6 +977,7 @@ "outputs": [], "source": [ "from sagemaker.workflow.callback_step import CallbackStep, CallbackOutput, CallbackOutputTypeEnum\n", + "from sagemaker.workflow.functions import Join\n", "\n", "callback1_output = CallbackOutput(\n", " output_name=\"s3_data_out\", output_type=CallbackOutputTypeEnum.String\n", @@ -987,7 +988,9 @@ " sqs_queue_url=queue_url,\n", " inputs={\n", " \"input_location\": f\"s3://{default_bucket}/{taxi_prefix}/\",\n", - " \"output_location\": f\"s3://{default_bucket}/{taxi_prefix}_{id_out}/\",\n", + " \"output_location\": Join(\n", + " on=\"/\", values=[\"s3:/\", default_bucket, f\"{taxi_prefix}_output\", id_out]\n", + " ),\n", " },\n", " outputs=[callback1_output],\n", ")" @@ -1000,9 +1003,9 @@ "source": [ "#### 2 - Training Step \n", "\n", - "Next, we'll configure the training step by first configuring the estimator for random cut forest. Then, we'll configure the training step. \n", + "Next, we'll configure the training step by first configuring the estimator for random cut forest. Then, we use the output of the estimator's .fit() method as arguments to the TrainingStep. By passing the pipeline_session to the sagemaker_session, calling .fit() does not launch the training job. Instead, it returns the arguments needed to run the job as a step in the pipeline.\n", "\n", - "The training step will accept the following **inputs**: \n", + "To generate the step arguments for the training step, it will accept the following **inputs**: \n", " * S3 location of processed data to be used for model training\n", " * ECR containing the training image for rcf\n", " * Estimator configuration\n", @@ -1018,6 +1021,8 @@ "metadata": {}, "outputs": [], "source": [ + "from sagemaker.workflow.pipeline_context import PipelineSession\n", + "\n", "containers = {\n", " \"us-west-2\": \"174872318107.dkr.ecr.us-west-2.amazonaws.com/randomcutforest:latest\",\n", " \"us-east-1\": \"382416733822.dkr.ecr.us-east-1.amazonaws.com/randomcutforest:latest\",\n", @@ -1028,7 +1033,7 @@ "container = containers[region_name]\n", "model_prefix = \"model\"\n", "\n", - "session = sagemaker.Session()\n", + "pipeline_session = PipelineSession()\n", "\n", "rcf = sagemaker.estimator.Estimator(\n", " container,\n", @@ -1036,7 +1041,7 @@ " output_path=\"s3://{}/{}/output\".format(default_bucket, model_prefix),\n", " instance_count=training_instance_count,\n", " instance_type=\"ml.c5.xlarge\",\n", - " sagemaker_session=session,\n", + " sagemaker_session=pipeline_session,\n", ")\n", "\n", "rcf.set_hyperparameters(num_samples_per_tree=200, num_trees=50, feature_dim=1)" @@ -1052,9 +1057,7 @@ "from sagemaker.inputs import TrainingInput\n", "from sagemaker.workflow.steps import TrainingStep\n", "\n", - "step_train = TrainingStep(\n", - " name=\"TrainModel\",\n", - " estimator=rcf,\n", + "train_step_args = rcf.fit(\n", " inputs={\n", " \"train\": TrainingInput(\n", " # s3_data = Output of the previous call back step\n", @@ -1063,6 +1066,10 @@ " distribution=\"ShardedByS3Key\",\n", " ),\n", " },\n", + ")\n", + "step_train = TrainingStep(\n", + " name=\"TrainModel\",\n", + " step_args=train_step_args,\n", ")" ] }, @@ -1073,9 +1080,9 @@ "source": [ "#### 3 - Create Model\n", "\n", - "Next, we'll package the trained model for deployment. \n", + "Next, we'll package the trained model for deployment. To achieve this, we define the ModelStep by providing the return values from `model.create()` as the step arguments. Similarly, the `pipeline_session` is required when defining the model, which puts off the model creation to the pipeline execution time.\n", "\n", - "The create model step will accept the following **inputs**: \n", + "To generate the step arguments for the model step, it will accept the following **inputs**: \n", " * S3 location of the trained model artifact\n", " * ECR containing the inference image for rcf\n", " \n", @@ -1100,7 +1107,7 @@ "model = Model(\n", " image_uri=image_uri,\n", " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " sagemaker_session=sagemaker_session,\n", + " sagemaker_session=pipeline_session,\n", " role=role,\n", ")" ] @@ -1112,19 +1119,14 @@ "metadata": {}, "outputs": [], "source": [ - "from sagemaker.inputs import CreateModelInput\n", - "from sagemaker.workflow.steps import CreateModelStep\n", + "from sagemaker.workflow.model_step import ModelStep\n", "\n", "\n", - "inputs = CreateModelInput(\n", + "model_step_args = model.create(\n", " instance_type=\"ml.m5.large\",\n", ")\n", "\n", - "create_model = CreateModelStep(\n", - " name=\"TaxiModel\",\n", - " model=model,\n", - " inputs=inputs,\n", - ")" + "create_model = ModelStep(name=\"TaxiModel\", step_args=model_step_args)" ] }, { @@ -1134,9 +1136,9 @@ "source": [ "#### 4 - Batch Transform\n", "\n", - "Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input. \n", + "Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input.\n", "\n", - "The batch transform step will accept the following **inputs**: \n", + "To generate the step arguments for the batch transform step, it will accept the following **inputs**: \n", " * SageMaker packaged model\n", " * S3 location of the input data\n", " * ECR containing the inference image for rcf\n", @@ -1164,6 +1166,7 @@ " accept=\"text/csv\",\n", " instance_count=1,\n", " output_path=f\"s3://{default_bucket}/{output_prefix}/\",\n", + " sagemaker_session=pipeline_session,\n", ")" ] }, @@ -1179,17 +1182,18 @@ "\n", "batch_data = step_callback_data.properties.Outputs[\"s3_data_out\"]\n", "\n", + "transform_step_args = transformer.transform(\n", + " data=batch_data,\n", + " content_type=\"text/csv\",\n", + " split_type=\"Line\",\n", + " input_filter=\"$[0]\",\n", + " join_source=\"Input\",\n", + " output_filter=\"$[0,-1]\",\n", + ")\n", + "\n", "step_transform = TransformStep(\n", " name=\"TaxiTransform\",\n", - " transformer=transformer,\n", - " inputs=TransformInput(\n", - " data=batch_data,\n", - " content_type=\"text/csv\",\n", - " split_type=\"Line\",\n", - " input_filter=\"$[0]\",\n", - " join_source=\"Input\",\n", - " output_filter=\"$[0,-1]\",\n", - " ),\n", + " step_args=transform_step_args,\n", ")" ] }, @@ -1201,19 +1205,6 @@ "### Configure Pipeline Using Created Steps" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e646229c", - "metadata": {}, - "outputs": [], - "source": [ - "import uuid\n", - "\n", - "id_out = uuid.uuid4().hex\n", - "print(\"Unique ID:\", id_out)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1222,8 +1213,9 @@ "outputs": [], "source": [ "from sagemaker.workflow.pipeline import Pipeline\n", + "from sagemaker.utils import unique_name_from_base\n", "\n", - "pipeline_name = f\"GluePipeline-{id_out}\"\n", + "pipeline_name = unique_name_from_base(\"GluePipeline\")\n", "pipeline = Pipeline(\n", " name=pipeline_name,\n", " parameters=[\n", @@ -1318,9 +1310,9 @@ "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3 (Data Science)", + "display_name": "Python 3", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1332,9 +1324,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.6.14" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/sagemaker-pipelines/tabular/custom_callback_pipelines_step/setup_iam_roles.py b/sagemaker-pipelines/tabular/custom_callback_pipelines_step/setup_iam_roles.py index ef5ae80d1b..2da54d19d5 100644 --- a/sagemaker-pipelines/tabular/custom_callback_pipelines_step/setup_iam_roles.py +++ b/sagemaker-pipelines/tabular/custom_callback_pipelines_step/setup_iam_roles.py @@ -1,241 +1,212 @@ import json import boto3 -iam = boto3.client('iam') +iam = boto3.client("iam") def create_ecs_task_role(role_name): try: response = iam.create_role( - RoleName = role_name, - AssumeRolePolicyDocument = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "ecs-tasks.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }), - Description='Role for ECS task execution' + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "ecs-tasks.amazonaws.com"}, + "Action": "sts:AssumeRole", + } + ], + } + ), + Description="Role for ECS task execution", ) - role_arn = response['Role']['Arn'] + role_arn = response["Role"]["Arn"] response = iam.attach_role_policy( RoleName=role_name, - PolicyArn='arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy' + PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy", ) - + response = iam.put_role_policy( RoleName=role_name, - PolicyName='create_log_group', - PolicyDocument='{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"logs:CreateLogGroup","Resource":"*"}}' + PolicyName="create_log_group", + PolicyDocument='{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"logs:CreateLogGroup","Resource":"*"}}', ) - + return role_arn - + except iam.exceptions.EntityAlreadyExistsException: - print(f'Using ARN from existing role: {role_name}') + print(f"Using ARN from existing role: {role_name}") response = iam.get_role(RoleName=role_name) - return response['Role']['Arn'] + return response["Role"]["Arn"] def create_task_runner_role(role_name): try: response = iam.create_role( - RoleName = role_name, - AssumeRolePolicyDocument = json.dumps({ + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "ecs-tasks.amazonaws.com"}, + "Action": "sts:AssumeRole", + } + ], + } + ), + Description="Role for ECS tasks", + ) + + role_arn = response["Role"]["Arn"] + + role_policy_document = json.dumps( + { "Version": "2012-10-17", "Statement": [ + {"Effect": "Allow", "Action": "sagemaker:*", "Resource": "*"}, { "Effect": "Allow", - "Principal": { - "Service": "ecs-tasks.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }), - Description='Role for ECS tasks' + "Action": ["glue:StartJobRun", "glue:GetJobRun"], + "Resource": "*", + }, + {"Effect": "Allow", "Action": "logs:CreateLogGroup", "Resource": "*"}, + ], + } ) - role_arn = response['Role']['Arn'] - - role_policy_document = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "sagemaker:*", - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "glue:StartJobRun", - "glue:GetJobRun" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": "logs:CreateLogGroup", - "Resource": "*" - } - ] - }) - response = iam.put_role_policy( RoleName=role_name, - PolicyName='glue_logs_sagemaker', - PolicyDocument=role_policy_document + PolicyName="glue_logs_sagemaker", + PolicyDocument=role_policy_document, ) - + response = iam.put_role_policy( RoleName=role_name, - PolicyName='create_log_group', - PolicyDocument='{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"logs:CreateLogGroup","Resource":"*"}}' + PolicyName="create_log_group", + PolicyDocument='{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"logs:CreateLogGroup","Resource":"*"}}', ) - + return role_arn except iam.exceptions.EntityAlreadyExistsException: - print(f'Using ARN from existing role: {role_name}') + print(f"Using ARN from existing role: {role_name}") response = iam.get_role(RoleName=role_name) - return response['Role']['Arn'] + return response["Role"]["Arn"] def create_glue_pipeline_role(role_name, bucket): try: response = iam.create_role( - RoleName = role_name, - AssumeRolePolicyDocument = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }), - Description='Role for Glue ETL job' + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "glue.amazonaws.com"}, + "Action": "sts:AssumeRole", + } + ], + } + ), + Description="Role for Glue ETL job", ) - role_arn = response['Role']['Arn'] + role_arn = response["Role"]["Arn"] response = iam.attach_role_policy( - RoleName=role_name, - PolicyArn='arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole' + RoleName=role_name, PolicyArn="arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" ) - - role_policy_document = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "s3:*", - "Resource": f"arn:aws:s3:::{bucket}" - } - ] - }) - + + role_policy_document = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + {"Effect": "Allow", "Action": "s3:*", "Resource": f"arn:aws:s3:::{bucket}"} + ], + } + ) + response = iam.put_role_policy( - RoleName=role_name, - PolicyName='glue_s3_bucket', - PolicyDocument=role_policy_document + RoleName=role_name, PolicyName="glue_s3_bucket", PolicyDocument=role_policy_document ) - - role_policy_document = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "s3:*", - "Resource": f"arn:aws:s3:::{bucket}/*" - } - ] - }) - + + role_policy_document = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + {"Effect": "Allow", "Action": "s3:*", "Resource": f"arn:aws:s3:::{bucket}/*"} + ], + } + ) + response = iam.put_role_policy( - RoleName=role_name, - PolicyName='glue_s3_objects', - PolicyDocument=role_policy_document + RoleName=role_name, PolicyName="glue_s3_objects", PolicyDocument=role_policy_document ) - + return role_arn except iam.exceptions.EntityAlreadyExistsException: - print(f'Using ARN from existing role: {role_name}') + print(f"Using ARN from existing role: {role_name}") response = iam.get_role(RoleName=role_name) - return response['Role']['Arn'] - + return response["Role"]["Arn"] + + def create_lambda_sm_pipeline_role(role_name, ecs_role_arn, task_role_arn): try: response = iam.create_role( - RoleName = role_name, - AssumeRolePolicyDocument = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Service": "lambda.amazonaws.com" - }, - "Action": "sts:AssumeRole" - } - ] - }), - Description='Role for Lambda to call ECS Fargate task' + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole", + } + ], + } + ), + Description="Role for Lambda to call ECS Fargate task", ) - role_arn = response['Role']['Arn'] + role_arn = response["Role"]["Arn"] response = iam.attach_role_policy( RoleName=role_name, - PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole' + PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", ) - role_policy_document = json.dumps({ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": "ecs:RunTask", - "Resource": ["*"] - }, - { - "Effect": "Allow", - "Action": "sqs:*", - "Resource": ["*"] - }, - { - "Effect": "Allow", - "Action": "sagemaker:*", - "Resource": ["*"] - }, - { - "Effect": "Allow", - "Action": "iam:PassRole", - "Resource": [ecs_role_arn, task_role_arn] - }, - ] - }) + role_policy_document = json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + {"Effect": "Allow", "Action": "ecs:RunTask", "Resource": ["*"]}, + {"Effect": "Allow", "Action": "sqs:*", "Resource": ["*"]}, + {"Effect": "Allow", "Action": "sagemaker:*", "Resource": ["*"]}, + { + "Effect": "Allow", + "Action": "iam:PassRole", + "Resource": [ecs_role_arn, task_role_arn], + }, + ], + } + ) response = iam.put_role_policy( - RoleName=role_name, - PolicyName='ecs_sqs_sagemaker', - PolicyDocument=role_policy_document + RoleName=role_name, PolicyName="ecs_sqs_sagemaker", PolicyDocument=role_policy_document ) return role_arn except iam.exceptions.EntityAlreadyExistsException: - print(f'Using ARN from existing role: {role_name}') + print(f"Using ARN from existing role: {role_name}") response = iam.get_role(RoleName=role_name) - return response['Role']['Arn'] \ No newline at end of file + return response["Role"]["Arn"] diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb index 695995a096..1036bce5cb 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step.ipynb @@ -1051,4 +1051,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb index fa431f8a44..1e10714587 100644 --- a/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb +++ b/sagemaker-pipelines/tabular/lambda-step/sagemaker-pipelines-lambda-step_outputs.ipynb @@ -329,9 +329,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n", - "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", - "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n" + "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\r\n", + "\u001B[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001B[0m\r\n" ] } ], @@ -1917,4 +1917,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From 611b4ba95ac5c213e3e53d9eb13248ec19d7ab42 Mon Sep 17 00:00:00 2001 From: atqy <95724753+atqy@users.noreply.github.com> Date: Wed, 17 Aug 2022 12:31:21 -0700 Subject: [PATCH 07/18] Implement Kendra search in RTD website (#3537) * implement unified search in RTD website * add sagemaker-debugger rtd to unified search * add licensing information * add licensing information * add licensing information * add licensing information --- LICENSE.txt | 18 + _static/kendrasearchtools.js | 692 +++++++++++++++++++++++++++++++++ _static/pagination.css | 17 + _static/search_accessories.css | 29 ++ _templates/search.html | 56 +++ conf.py | 5 + licenses/2-CLAUSE-BSD | 28 ++ 7 files changed, 845 insertions(+) create mode 100644 _static/kendrasearchtools.js create mode 100644 _static/pagination.css create mode 100644 _static/search_accessories.css create mode 100644 _templates/search.html create mode 100644 licenses/2-CLAUSE-BSD diff --git a/LICENSE.txt b/LICENSE.txt index d645695673..6ff2c6fd00 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -200,3 +200,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + ====================================================================================== + Amazon SageMaker Examples Subcomponents: + + The Amazon SageMaker Examples project contains subcomponents with separate + copyright notices and license terms. Your use of the source code for the + these subcomponents is subject to the terms and conditions of the following + licenses. See licenses/ for text of these licenses. + + If a folder hierarchy is listed as subcomponent, separate listings of + further subcomponents (files or folder hierarchies) part of the hierarchy + take precedence. + + ======================================================================================= + 2-clause BSD license + ======================================================================================= + _static/kendrasearchtools.js + _templates/search.html diff --git a/_static/kendrasearchtools.js b/_static/kendrasearchtools.js new file mode 100644 index 0000000000..f2d47ef889 --- /dev/null +++ b/_static/kendrasearchtools.js @@ -0,0 +1,692 @@ +/* + * kendrasearchtools.js + * ~~~~~~~~~~~~~~~~ + * + * A modification of searchtools.js (https://github.com/sphinx-doc/sphinx/blob/275d9/sphinx/themes/basic/static/searchtools.js) + * where the default full-text search implemented in searchtools.js is replaced with AWS Kendra searching over multiple + * websites. The default full-text search is still kept and implemented as a fallback in the case that the Kendra search doesn't work. + * + * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +if (!Scorer) { + /** + * Simple result scoring code. + */ + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [filename, title, anchor, descr, score] + // and returns the new score. + /* + score: function(result) { + return result[4]; + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: {0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5}, // used to be unimportantResults + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2 + }; +} + +if (!splitQuery) { + function splitQuery(query) { + return query.split(/\s+/); + } +} + +/** + * default rtd search (used as fallback) + */ +var Search = { + + _index : null, + _queued_query : null, + _pulse_status : -1, + + htmlToText : function(htmlString) { + var virtualDocument = document.implementation.createHTMLDocument('virtual'); + var htmlElement = $(htmlString, virtualDocument); + htmlElement.find('.headerlink').remove(); + docContent = htmlElement.find('[role=main]')[0]; + if(docContent === undefined) { + console.warn("Content block not found. Sphinx search tries to obtain it " + + "via '[role=main]'. Could you check your theme or template."); + return ""; + } + return docContent.textContent || docContent.innerText; + }, + + init : function() { + var params = $.getQueryParameters(); + if (params.q) { + var query = params.q[0]; + $('input[name="q"]')[0].value = query; + // this.performSearch(query); + } + }, + + loadIndex : function(url) { + $.ajax({type: "GET", url: url, data: null, + dataType: "script", cache: true, + complete: function(jqxhr, textstatus) { + if (textstatus != "success") { + document.getElementById("searchindexloader").src = url; + } + }}); + }, + + setIndex : function(index) { + var q; + this._index = index; + if ((q = this._queued_query) !== null) { + this._queued_query = null; + Search.query(q); + } + }, + + hasIndex : function() { + return this._index !== null; + }, + + deferQuery : function(query) { + this._queued_query = query; + }, + + stopPulse : function() { + this._pulse_status = 0; + }, + + startPulse : function() { + if (this._pulse_status >= 0) + return; + function pulse() { + var i; + Search._pulse_status = (Search._pulse_status + 1) % 4; + var dotString = ''; + for (i = 0; i < Search._pulse_status; i++) + dotString += '.'; + Search.dots.text(dotString); + if (Search._pulse_status > -1) + window.setTimeout(pulse, 500); + } + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch : function(query) { + // create the required interface elements + this.out = $('#search-results'); + this.title = $('#search-results h2:first'); // $('

' + _('Searching') + '

').appendTo(this.out); + this.dots = $('#search-results span:first'); //$('').appendTo(this.title); + this.status = $('#search-results p:first'); // $('

 

').appendTo(this.out); + this.output = $('#search-results ul:first'); //$('