Skip to content

Commit

Permalink
Add Online Explainability notebooks for SageMaker Clarify
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotra18 committed Sep 30, 2022
1 parent 413abd0 commit d7c7408
Show file tree
Hide file tree
Showing 6 changed files with 2,399 additions and 0 deletions.
9 changes: 9 additions & 0 deletions sagemaker-clarify/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ SageMaker Clarify Model Monitoring
:maxdepth: 1

../sagemaker_model_monitor/fairness_and_explainability/SageMaker-Model-Monitor-Fairness-and-Explainability

SageMaker Clarify Online Explainability
---------------------------------------

.. toctree::
:maxdepth: 1

online_explainability/tabular/tabular_online_explainability_with_sagemaker_clarify
online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from io import StringIO
import numpy as np
import os
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from typing import Any, Dict, List


def model_fn(model_dir: str) -> Dict[str, Any]:
"""
Load the model for inference
"""
model_path = os.path.join(model_dir, "model")

# Load HuggingFace tokenizer.
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Load HuggingFace model from disk.
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model_dict = {"model": model, "tokenizer": tokenizer}
return model_dict


def predict_fn(input_data: List, model: Dict) -> np.ndarray:
"""
Apply model to the incoming request
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = model["tokenizer"]
huggingface_model = model["model"]

encoded_input = tokenizer(input_data, truncation=True, padding=True, max_length=128, return_tensors="pt")
encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
with torch.no_grad():
output = huggingface_model(input_ids=encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
res = torch.nn.Softmax(dim=1)(output.logits).detach().cpu().numpy()[:, 1]
return res


def input_fn(request_body: str, request_content_type: str) -> List[str]:
"""
Deserialize and prepare the prediction input
"""
if request_content_type == "application/json":
sentences = [json.loads(request_body)]

elif request_content_type == "text/csv":
# We have a single column with the text.
sentences = list(pd.read_csv(StringIO(request_body), header=None).values[:, 0].astype(str))
else:
sentences = request_body
return sentences
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
transformers==4.2.1
torch==1.7.1
pandas
Loading

0 comments on commit d7c7408

Please sign in to comment.