Skip to content

Commit

Permalink
[Feature]Add Online Explainability notebooks for SageMaker Clarify (#…
Browse files Browse the repository at this point in the history
…3613)

* Add Online Explainability notebooks for SageMaker Clarify

* Correcting text in clean-up sections of online explainability example notebooks

* Updating install commands for captum and sagemaker pypy packages

* debug captum installation

* change instance type

Co-authored-by: Aaron Markham <[email protected]>
Co-authored-by: atqy <[email protected]>
Co-authored-by: atqy <[email protected]>
  • Loading branch information
4 people authored Oct 5, 2022
1 parent 56d5be1 commit bc29bb7
Show file tree
Hide file tree
Showing 6 changed files with 2,326 additions and 0 deletions.
9 changes: 9 additions & 0 deletions sagemaker-clarify/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ SageMaker Clarify Model Monitoring
:maxdepth: 1

../sagemaker_model_monitor/fairness_and_explainability/SageMaker-Model-Monitor-Fairness-and-Explainability

SageMaker Clarify Online Explainability
---------------------------------------

.. toctree::
:maxdepth: 1

online_explainability/tabular/tabular_online_explainability_with_sagemaker_clarify
online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from io import StringIO
import numpy as np
import os
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from typing import Any, Dict, List


def model_fn(model_dir: str) -> Dict[str, Any]:
"""
Load the model for inference
"""
model_path = os.path.join(model_dir, "model")

# Load HuggingFace tokenizer.
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Load HuggingFace model from disk.
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model_dict = {"model": model, "tokenizer": tokenizer}
return model_dict


def predict_fn(input_data: List, model: Dict) -> np.ndarray:
"""
Apply model to the incoming request
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = model["tokenizer"]
huggingface_model = model["model"]

encoded_input = tokenizer(input_data, truncation=True, padding=True, max_length=128, return_tensors="pt")
encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
with torch.no_grad():
output = huggingface_model(input_ids=encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
res = torch.nn.Softmax(dim=1)(output.logits).detach().cpu().numpy()[:, 1]
return res


def input_fn(request_body: str, request_content_type: str) -> List[str]:
"""
Deserialize and prepare the prediction input
"""
if request_content_type == "application/json":
sentences = [json.loads(request_body)]

elif request_content_type == "text/csv":
# We have a single column with the text.
sentences = list(pd.read_csv(StringIO(request_body), header=None).values[:, 0].astype(str))
else:
sentences = request_body
return sentences
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
transformers==4.2.1
torch==1.7.1
pandas
Loading

0 comments on commit bc29bb7

Please sign in to comment.