Merge branch 'main' into main-beta-rtd-search

aws · Aug 17, 2022 · 06f2ee6 · 06f2ee6
2 parents 15b8ae9 + 8159d5d
commit 06f2ee6
Show file tree

Hide file tree

Showing 9 changed files with 189 additions and 224 deletions.
diff --git a/...erving_with_fastapi/FastAPI Example.ipynb → ...erving_with_fastapi/FastAPI_Example.ipynb b/...erving_with_fastapi/FastAPI Example.ipynb → ...erving_with_fastapi/FastAPI_Example.ipynb
diff --git a/...erving_with_plumber/Plumber Example.ipynb → ...erving_with_plumber/Plumber_Example.ipynb b/...erving_with_plumber/Plumber Example.ipynb → ...erving_with_plumber/Plumber_Example.ipynb
diff --git a/r_examples/r_serving_with_restrserve/Dockerfile b/r_examples/r_serving_with_restrserve/Dockerfile
@@ -2,7 +2,9 @@ FROM r-base:3.6.3
 
 MAINTAINER Amazon SageMaker Examples <[email protected]>
 
-RUN R -e "install.packages(c('RestRserve','xgboost','dplyr'), repos='https://cloud.r-project.org')"
+RUN R -e "install.packages(c('RestRserve','data.table', 'stringi', 'dplyr'), repos='https://cloud.r-project.org')"
+RUN wget http://cran.r-project.org/src/contrib/Archive/xgboost/xgboost_1.4.1.1.tar.gz
+RUN R CMD INSTALL xgboost_1.4.1.1.tar.gz
 
 COPY xgb.model /opt/ml/xgb.model
 COPY restrserve.R /opt/ml/restrserve.R

diff --git a/..._with_restrserve/RestRServe Example.ipynb → ..._with_restrserve/RestRServe_Example.ipynb b/..._with_restrserve/RestRServe Example.ipynb → ..._with_restrserve/RestRServe_Example.ipynb
diff --git a/..._train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb b/..._train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform_outputs.ipynb
@@ -272,9 +272,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n",
-      "\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n",
-      "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n"
+      "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\r\n",
+      "\u001B[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n",
+      "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001B[0m\r\n"
      ]
     }
    ],
@@ -3441,4 +3441,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/...-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb b/...-pipelines/tabular/custom_callback_pipelines_step/sagemaker-pipelines-callback-step.ipynb
@@ -899,7 +899,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install \"sagemaker==2.91.1\""
+    "!pip install \"sagemaker>=2.99.0\""
    ]
   },
   {
@@ -977,6 +977,7 @@
    "outputs": [],
    "source": [
     "from sagemaker.workflow.callback_step import CallbackStep, CallbackOutput, CallbackOutputTypeEnum\n",
+    "from sagemaker.workflow.functions import Join\n",
     "\n",
     "callback1_output = CallbackOutput(\n",
     "    output_name=\"s3_data_out\", output_type=CallbackOutputTypeEnum.String\n",
@@ -987,7 +988,9 @@
     "    sqs_queue_url=queue_url,\n",
     "    inputs={\n",
     "        \"input_location\": f\"s3://{default_bucket}/{taxi_prefix}/\",\n",
-    "        \"output_location\": f\"s3://{default_bucket}/{taxi_prefix}_{id_out}/\",\n",
+    "        \"output_location\": Join(\n",
+    "            on=\"/\", values=[\"s3:/\", default_bucket, f\"{taxi_prefix}_output\", id_out]\n",
+    "        ),\n",
     "    },\n",
     "    outputs=[callback1_output],\n",
     ")"
@@ -1000,9 +1003,9 @@
    "source": [
     "#### 2 - Training Step \n",
     "\n",
-    "Next, we'll configure the training step by first configuring the estimator for random cut forest.  Then, we'll configure the training step. \n",
+    "Next, we'll configure the training step by first configuring the estimator for random cut forest. Then, we use the output of the estimator's .fit() method as arguments to the TrainingStep. By passing the pipeline_session to the sagemaker_session, calling .fit() does not launch the training job. Instead, it returns the arguments needed to run the job as a step in the pipeline.\n",
     "\n",
-    "The training step will accept the following **inputs**: \n",
+    "To generate the step arguments for the training step, it will accept the following **inputs**: \n",
     "  * S3 location of processed data to be used for model training\n",
     "  * ECR containing the training image for rcf\n",
     "  * Estimator configuration\n",
@@ -1018,6 +1021,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from sagemaker.workflow.pipeline_context import PipelineSession\n",
+    "\n",
     "containers = {\n",
     "    \"us-west-2\": \"174872318107.dkr.ecr.us-west-2.amazonaws.com/randomcutforest:latest\",\n",
     "    \"us-east-1\": \"382416733822.dkr.ecr.us-east-1.amazonaws.com/randomcutforest:latest\",\n",
@@ -1028,15 +1033,15 @@
     "container = containers[region_name]\n",
     "model_prefix = \"model\"\n",
     "\n",
-    "session = sagemaker.Session()\n",
+    "pipeline_session = PipelineSession()\n",
     "\n",
     "rcf = sagemaker.estimator.Estimator(\n",
     "    container,\n",
     "    sagemaker.get_execution_role(),\n",
     "    output_path=\"s3://{}/{}/output\".format(default_bucket, model_prefix),\n",
     "    instance_count=training_instance_count,\n",
     "    instance_type=\"ml.c5.xlarge\",\n",
-    "    sagemaker_session=session,\n",
+    "    sagemaker_session=pipeline_session,\n",
     ")\n",
     "\n",
     "rcf.set_hyperparameters(num_samples_per_tree=200, num_trees=50, feature_dim=1)"
@@ -1052,9 +1057,7 @@
     "from sagemaker.inputs import TrainingInput\n",
     "from sagemaker.workflow.steps import TrainingStep\n",
     "\n",
-    "step_train = TrainingStep(\n",
-    "    name=\"TrainModel\",\n",
-    "    estimator=rcf,\n",
+    "train_step_args = rcf.fit(\n",
     "    inputs={\n",
     "        \"train\": TrainingInput(\n",
     "            # s3_data = Output of the previous call back step\n",
@@ -1063,6 +1066,10 @@
     "            distribution=\"ShardedByS3Key\",\n",
     "        ),\n",
     "    },\n",
+    ")\n",
+    "step_train = TrainingStep(\n",
+    "    name=\"TrainModel\",\n",
+    "    step_args=train_step_args,\n",
     ")"
    ]
   },
@@ -1073,9 +1080,9 @@
    "source": [
     "#### 3 - Create Model\n",
     "\n",
-    "Next, we'll package the trained model for deployment.  \n",
+    "Next, we'll package the trained model for deployment. To achieve this, we define the ModelStep by providing the return values from `model.create()` as the step arguments. Similarly, the `pipeline_session` is required when defining the model, which puts off the model creation to the pipeline execution time.\n",
     "\n",
-    "The create model step will accept the following **inputs**: \n",
+    "To generate the step arguments for the model step, it will accept the following **inputs**: \n",
     "  * S3 location of the trained model artifact\n",
     "  * ECR containing the inference image for rcf\n",
     "  \n",
@@ -1100,7 +1107,7 @@
     "model = Model(\n",
     "    image_uri=image_uri,\n",
     "    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n",
-    "    sagemaker_session=sagemaker_session,\n",
+    "    sagemaker_session=pipeline_session,\n",
     "    role=role,\n",
     ")"
    ]
@@ -1112,19 +1119,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sagemaker.inputs import CreateModelInput\n",
-    "from sagemaker.workflow.steps import CreateModelStep\n",
+    "from sagemaker.workflow.model_step import ModelStep\n",
     "\n",
     "\n",
-    "inputs = CreateModelInput(\n",
+    "model_step_args = model.create(\n",
     "    instance_type=\"ml.m5.large\",\n",
     ")\n",
     "\n",
-    "create_model = CreateModelStep(\n",
-    "    name=\"TaxiModel\",\n",
-    "    model=model,\n",
-    "    inputs=inputs,\n",
-    ")"
+    "create_model = ModelStep(name=\"TaxiModel\", step_args=model_step_args)"
    ]
   },
   {
@@ -1134,9 +1136,9 @@
    "source": [
     "#### 4 - Batch Transform\n",
     "\n",
-    "Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input.  \n",
+    "Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input.\n",
     "\n",
-    "The batch transform step will accept the following **inputs**: \n",
+    "To generate the step arguments for the batch transform step, it will accept the following **inputs**: \n",
     "  * SageMaker packaged model\n",
     "  * S3 location of the input data\n",
     "  * ECR containing the inference image for rcf\n",
@@ -1164,6 +1166,7 @@
     "    accept=\"text/csv\",\n",
     "    instance_count=1,\n",
     "    output_path=f\"s3://{default_bucket}/{output_prefix}/\",\n",
+    "    sagemaker_session=pipeline_session,\n",
     ")"
    ]
   },
@@ -1179,17 +1182,18 @@
     "\n",
     "batch_data = step_callback_data.properties.Outputs[\"s3_data_out\"]\n",
     "\n",
+    "transform_step_args = transformer.transform(\n",
+    "    data=batch_data,\n",
+    "    content_type=\"text/csv\",\n",
+    "    split_type=\"Line\",\n",
+    "    input_filter=\"$[0]\",\n",
+    "    join_source=\"Input\",\n",
+    "    output_filter=\"$[0,-1]\",\n",
+    ")\n",
+    "\n",
     "step_transform = TransformStep(\n",
     "    name=\"TaxiTransform\",\n",
-    "    transformer=transformer,\n",
-    "    inputs=TransformInput(\n",
-    "        data=batch_data,\n",
-    "        content_type=\"text/csv\",\n",
-    "        split_type=\"Line\",\n",
-    "        input_filter=\"$[0]\",\n",
-    "        join_source=\"Input\",\n",
-    "        output_filter=\"$[0,-1]\",\n",
-    "    ),\n",
+    "    step_args=transform_step_args,\n",
     ")"
    ]
   },
@@ -1201,19 +1205,6 @@
     "### Configure Pipeline Using Created Steps"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e646229c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import uuid\n",
-    "\n",
-    "id_out = uuid.uuid4().hex\n",
-    "print(\"Unique ID:\", id_out)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1222,8 +1213,9 @@
    "outputs": [],
    "source": [
     "from sagemaker.workflow.pipeline import Pipeline\n",
+    "from sagemaker.utils import unique_name_from_base\n",
     "\n",
-    "pipeline_name = f\"GluePipeline-{id_out}\"\n",
+    "pipeline_name = unique_name_from_base(\"GluePipeline\")\n",
     "pipeline = Pipeline(\n",
     "    name=pipeline_name,\n",
     "    parameters=[\n",
@@ -1318,9 +1310,9 @@
  "metadata": {
   "instance_type": "ml.t3.medium",
   "kernelspec": {
-   "display_name": "Python 3 (Data Science)",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1332,9 +1324,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.6.14"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}