From a61dd5d97748f48014bb7287384b11fcd8481fe6 Mon Sep 17 00:00:00 2001 From: atqy Date: Wed, 5 Oct 2022 19:23:23 +0000 Subject: [PATCH] debug captum installation --- ...xplainability_with_sagemaker_clarify.ipynb | 94 ++----------------- 1 file changed, 10 insertions(+), 84 deletions(-) diff --git a/sagemaker-clarify/online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify.ipynb b/sagemaker-clarify/online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify.ipynb index 670a087ed8..10a8bf125e 100644 --- a/sagemaker-clarify/online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify.ipynb +++ b/sagemaker-clarify/online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "8f6a6429", "metadata": { "tags": [] }, @@ -12,7 +11,6 @@ }, { "cell_type": "markdown", - "id": "b283e01b", "metadata": {}, "source": [ "* [Introduction](#Introduction)\n", @@ -46,7 +44,6 @@ }, { "cell_type": "markdown", - "id": "bd298c7c", "metadata": {}, "source": [ "## Introduction\n", @@ -65,7 +62,6 @@ }, { "cell_type": "markdown", - "id": "5d595da1", "metadata": {}, "source": [ "## General Setup\n", @@ -75,7 +71,6 @@ }, { "cell_type": "markdown", - "id": "c2d8f4f5", "metadata": {}, "source": [ "### Install dependencies\n", @@ -86,7 +81,6 @@ { "cell_type": "code", "execution_count": null, - "id": "90abc161", "metadata": {}, "outputs": [], "source": [ @@ -95,7 +89,6 @@ }, { "cell_type": "markdown", - "id": "f97b4873", "metadata": {}, "source": [ "Upgrade the SageMaker Python SDK, and captum is used to visualize the feature attributions." @@ -104,19 +97,25 @@ { "cell_type": "code", "execution_count": null, - "id": "abcfdde4", "metadata": {}, "outputs": [], "source": [ "!pip install sagemaker --upgrade\n", - "!pip install captum --upgrade\n", "!pip install boto3 --upgrade\n", "!pip install botocore --upgrade" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install captum --upgrade" + ] + }, { "cell_type": "markdown", - "id": "ea012dd8", "metadata": {}, "source": [ "### Import libraries" @@ -125,7 +124,6 @@ { "cell_type": "code", "execution_count": null, - "id": "9037a760", "metadata": {}, "outputs": [], "source": [ @@ -149,7 +147,6 @@ }, { "cell_type": "markdown", - "id": "ebf57dd3", "metadata": {}, "source": [ "### Set configurations" @@ -158,7 +155,6 @@ { "cell_type": "code", "execution_count": null, - "id": "e327066b", "metadata": {}, "outputs": [], "source": [ @@ -202,7 +198,6 @@ }, { "cell_type": "markdown", - "id": "666092a7", "metadata": {}, "source": [ "### Create serializer and deserializer\n", @@ -213,7 +208,6 @@ { "cell_type": "code", "execution_count": null, - "id": "25b5e12e", "metadata": {}, "outputs": [], "source": [ @@ -222,7 +216,6 @@ }, { "cell_type": "markdown", - "id": "b0281dd5", "metadata": {}, "source": [ "JSON deserializer to deserialize invoke endpoint response" @@ -231,7 +224,6 @@ { "cell_type": "code", "execution_count": null, - "id": "7bf4c91d", "metadata": {}, "outputs": [], "source": [ @@ -240,7 +232,6 @@ }, { "cell_type": "markdown", - "id": "9aa6970f", "metadata": {}, "source": [ "### For visualization" @@ -249,7 +240,6 @@ { "cell_type": "code", "execution_count": null, - "id": "63fb58f4", "metadata": {}, "outputs": [], "source": [ @@ -333,7 +323,6 @@ }, { "cell_type": "markdown", - "id": "3315a74e", "metadata": {}, "source": [ "## Prepare data" @@ -341,7 +330,6 @@ }, { "cell_type": "markdown", - "id": "c8fb2bd4", "metadata": {}, "source": [ "### Download data\n", @@ -353,7 +341,6 @@ { "cell_type": "code", "execution_count": null, - "id": "fb86643b", "metadata": {}, "outputs": [], "source": [ @@ -362,7 +349,6 @@ }, { "cell_type": "markdown", - "id": "f4fb5d58", "metadata": {}, "source": [ "### Load the dataset" @@ -371,7 +357,6 @@ { "cell_type": "code", "execution_count": null, - "id": "6418efa7", "metadata": {}, "outputs": [], "source": [ @@ -381,7 +366,6 @@ }, { "cell_type": "markdown", - "id": "e509c8dc", "metadata": {}, "source": [ "**Context**\n", @@ -414,7 +398,6 @@ }, { "cell_type": "markdown", - "id": "86df90cf", "metadata": {}, "source": [ "### Data preparation for model training" @@ -422,7 +405,6 @@ }, { "cell_type": "markdown", - "id": "569c360d", "metadata": {}, "source": [ "#### Target Variable Creation\n", @@ -432,7 +414,6 @@ { "cell_type": "code", "execution_count": null, - "id": "3f13c4ca", "metadata": {}, "outputs": [], "source": [ @@ -450,7 +431,6 @@ }, { "cell_type": "markdown", - "id": "c1012f20", "metadata": {}, "source": [ "#### Train-Validation-Test splits" @@ -458,7 +438,6 @@ }, { "cell_type": "markdown", - "id": "e22eedfa", "metadata": {}, "source": [ "The most common approach for model evaluation is using the train/validation/test split. Although this approach can be very effective in general, it can result in misleading results and potentially fail when used on classification problems with a severe class imbalance. Instead, the technique must be modified to stratify the sampling by the class label as below. Stratification ensures that all classes are well represented across the train, validation and test datasets.\n" @@ -467,7 +446,6 @@ { "cell_type": "code", "execution_count": null, - "id": "7ebc5ce9", "metadata": {}, "outputs": [], "source": [ @@ -518,7 +496,6 @@ { "cell_type": "code", "execution_count": null, - "id": "08fe7383", "metadata": {}, "outputs": [], "source": [ @@ -534,7 +511,6 @@ }, { "cell_type": "markdown", - "id": "82077236", "metadata": {}, "source": [ "We have split the dataset into train, test, and validation datasets. We use the train and validation datasets during training process, and run Clarify on the test dataset.\n", @@ -545,7 +521,6 @@ { "cell_type": "code", "execution_count": null, - "id": "bb4aa0f3", "metadata": {}, "outputs": [], "source": [ @@ -555,7 +530,6 @@ }, { "cell_type": "markdown", - "id": "a8bed8cc", "metadata": {}, "source": [ "### Upload the dataset\n", @@ -565,7 +539,6 @@ { "cell_type": "code", "execution_count": null, - "id": "b57af958", "metadata": {}, "outputs": [], "source": [ @@ -585,7 +558,6 @@ }, { "cell_type": "markdown", - "id": "5b597dfa", "metadata": {}, "source": [ "## Train and Deploy Hugging Face Model" @@ -593,7 +565,6 @@ }, { "cell_type": "markdown", - "id": "d9cf1d29", "metadata": {}, "source": [ "In this step of the workflow, we use the [Hugging Face Estimator](https://sagemaker.readthedocs.io/en/stable/frameworks/huggingface/sagemaker.huggingface.html) to load the pre-trained `distilbert-base-uncased` model and fine-tune the model on our dataset." @@ -601,7 +572,6 @@ }, { "cell_type": "markdown", - "id": "417cc3ab", "metadata": {}, "source": [ "### Train model with Hugging Face estimator\n", @@ -613,7 +583,6 @@ { "cell_type": "code", "execution_count": null, - "id": "24e09350", "metadata": {}, "outputs": [], "source": [ @@ -640,7 +609,6 @@ }, { "cell_type": "markdown", - "id": "7ed85871", "metadata": {}, "source": [ "### Download the trained model files" @@ -649,7 +617,6 @@ { "cell_type": "code", "execution_count": null, - "id": "0b4c9437", "metadata": {}, "outputs": [], "source": [ @@ -660,7 +627,6 @@ }, { "cell_type": "markdown", - "id": "ff1c68c1", "metadata": {}, "source": [ "### Prepare model container definition\n", @@ -671,7 +637,6 @@ { "cell_type": "code", "execution_count": null, - "id": "25add8d6", "metadata": {}, "outputs": [], "source": [ @@ -687,7 +652,6 @@ }, { "cell_type": "markdown", - "id": "93d150bb", "metadata": {}, "source": [ "Create a new model object and then update its model artifact and inference script. The model object will be used to create the SageMaker model." @@ -696,7 +660,6 @@ { "cell_type": "code", "execution_count": null, - "id": "4a489ddd", "metadata": {}, "outputs": [], "source": [ @@ -709,7 +672,6 @@ }, { "cell_type": "markdown", - "id": "07d830a4", "metadata": { "tags": [] }, @@ -719,7 +681,6 @@ }, { "cell_type": "markdown", - "id": "64185ad4", "metadata": {}, "source": [ "### Create model\n", @@ -736,7 +697,6 @@ { "cell_type": "code", "execution_count": null, - "id": "80c4aa5a", "metadata": {}, "outputs": [], "source": [ @@ -750,7 +710,6 @@ }, { "cell_type": "markdown", - "id": "27986dc5", "metadata": {}, "source": [ "### Create endpoint config\n", @@ -760,7 +719,6 @@ }, { "cell_type": "markdown", - "id": "1418da09", "metadata": {}, "source": [ "Here we use a special token as the baseline." @@ -769,7 +727,6 @@ { "cell_type": "code", "execution_count": null, - "id": "5a68678d", "metadata": {}, "outputs": [], "source": [ @@ -779,7 +736,6 @@ }, { "cell_type": "markdown", - "id": "625261da", "metadata": {}, "source": [ "The `TextConfig` configured with `sentence` level granularity (When granularity is `sentence`, each sentence is a feature, and we need a few sentences per review for good visualization) and the language as English." @@ -788,7 +744,6 @@ { "cell_type": "code", "execution_count": null, - "id": "d45710a7", "metadata": {}, "outputs": [], "source": [ @@ -816,7 +771,6 @@ }, { "cell_type": "markdown", - "id": "b6d7d0ae", "metadata": {}, "source": [ "### Create endpoint\n", @@ -827,7 +781,6 @@ { "cell_type": "code", "execution_count": null, - "id": "8e9b020d", "metadata": {}, "outputs": [], "source": [ @@ -839,7 +792,6 @@ }, { "cell_type": "markdown", - "id": "a535f7a2", "metadata": {}, "source": [ "Wait for the endpoint to be in \"InService\" state" @@ -848,7 +800,6 @@ { "cell_type": "code", "execution_count": null, - "id": "e71e6ca6", "metadata": {}, "outputs": [], "source": [ @@ -857,7 +808,6 @@ }, { "cell_type": "markdown", - "id": "50fc3c5e", "metadata": { "tags": [] }, @@ -873,7 +823,6 @@ }, { "cell_type": "markdown", - "id": "697010d5", "metadata": {}, "source": [ "### Single record request\n", @@ -884,7 +833,6 @@ { "cell_type": "code", "execution_count": null, - "id": "20afd02f", "metadata": {}, "outputs": [], "source": [ @@ -894,7 +842,6 @@ { "cell_type": "code", "execution_count": null, - "id": "c0f50d82", "metadata": {}, "outputs": [], "source": [ @@ -910,7 +857,6 @@ { "cell_type": "code", "execution_count": null, - "id": "d911e8f2", "metadata": {}, "outputs": [], "source": [ @@ -921,7 +867,6 @@ { "cell_type": "code", "execution_count": null, - "id": "23ff7d29", "metadata": {}, "outputs": [], "source": [ @@ -930,7 +875,6 @@ }, { "cell_type": "markdown", - "id": "1c83eefe", "metadata": {}, "source": [ "### Single record request, no explanation\n", @@ -941,7 +885,6 @@ { "cell_type": "code", "execution_count": null, - "id": "bd0b346a", "metadata": {}, "outputs": [], "source": [ @@ -951,7 +894,6 @@ { "cell_type": "code", "execution_count": null, - "id": "5dac9e6c", "metadata": {}, "outputs": [], "source": [ @@ -968,7 +910,6 @@ { "cell_type": "code", "execution_count": null, - "id": "9c3341de", "metadata": {}, "outputs": [], "source": [ @@ -979,7 +920,6 @@ { "cell_type": "code", "execution_count": null, - "id": "b3e72bee", "metadata": {}, "outputs": [], "source": [ @@ -988,7 +928,6 @@ }, { "cell_type": "markdown", - "id": "ddf903ef", "metadata": {}, "source": [ "### Batch request, explain both\n", @@ -999,7 +938,6 @@ { "cell_type": "code", "execution_count": null, - "id": "de154390", "metadata": {}, "outputs": [], "source": [ @@ -1009,7 +947,6 @@ { "cell_type": "code", "execution_count": null, - "id": "7ff91a9c", "metadata": {}, "outputs": [], "source": [ @@ -1025,7 +962,6 @@ { "cell_type": "code", "execution_count": null, - "id": "7741275b", "metadata": {}, "outputs": [], "source": [ @@ -1036,7 +972,6 @@ { "cell_type": "code", "execution_count": null, - "id": "be50e1d2", "metadata": {}, "outputs": [], "source": [ @@ -1045,7 +980,6 @@ }, { "cell_type": "markdown", - "id": "ebd1173a", "metadata": {}, "source": [ "### Batch request with more records, explain some of the records\n", @@ -1056,7 +990,6 @@ { "cell_type": "code", "execution_count": null, - "id": "42fa7cee", "metadata": {}, "outputs": [], "source": [ @@ -1066,7 +999,6 @@ { "cell_type": "code", "execution_count": null, - "id": "cc4cd864", "metadata": {}, "outputs": [], "source": [ @@ -1083,7 +1015,6 @@ { "cell_type": "code", "execution_count": null, - "id": "59857f61", "metadata": {}, "outputs": [], "source": [ @@ -1094,7 +1025,6 @@ { "cell_type": "code", "execution_count": null, - "id": "d6914573", "metadata": {}, "outputs": [], "source": [ @@ -1103,7 +1033,6 @@ }, { "cell_type": "markdown", - "id": "081c9aa8", "metadata": { "tags": [] }, @@ -1116,7 +1045,6 @@ { "cell_type": "code", "execution_count": null, - "id": "5cae69ef", "metadata": {}, "outputs": [], "source": [ @@ -1126,7 +1054,6 @@ { "cell_type": "code", "execution_count": null, - "id": "1f12c59e", "metadata": {}, "outputs": [], "source": [ @@ -1136,7 +1063,6 @@ { "cell_type": "code", "execution_count": null, - "id": "1390a2f3", "metadata": {}, "outputs": [], "source": [ @@ -1149,7 +1075,7 @@ "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": {