Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
atqy authored Oct 7, 2022
2 parents 238bc09 + bc29bb7 commit 806fca7
Show file tree
Hide file tree
Showing 6 changed files with 2,326 additions and 0 deletions.
9 changes: 9 additions & 0 deletions sagemaker-clarify/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ SageMaker Clarify Model Monitoring
:maxdepth: 1

../sagemaker_model_monitor/fairness_and_explainability/SageMaker-Model-Monitor-Fairness-and-Explainability

SageMaker Clarify Online Explainability
---------------------------------------

.. toctree::
:maxdepth: 1

online_explainability/tabular/tabular_online_explainability_with_sagemaker_clarify
online_explainability/natural_language_processing/nlp_online_explainability_with_sagemaker_clarify
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from io import StringIO
import numpy as np
import os
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from typing import Any, Dict, List


def model_fn(model_dir: str) -> Dict[str, Any]:
"""
Load the model for inference
"""
model_path = os.path.join(model_dir, "model")

# Load HuggingFace tokenizer.
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Load HuggingFace model from disk.
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model_dict = {"model": model, "tokenizer": tokenizer}
return model_dict


def predict_fn(input_data: List, model: Dict) -> np.ndarray:
"""
Apply model to the incoming request
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = model["tokenizer"]
huggingface_model = model["model"]

encoded_input = tokenizer(input_data, truncation=True, padding=True, max_length=128, return_tensors="pt")
encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
with torch.no_grad():
output = huggingface_model(input_ids=encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
res = torch.nn.Softmax(dim=1)(output.logits).detach().cpu().numpy()[:, 1]
return res


def input_fn(request_body: str, request_content_type: str) -> List[str]:
"""
Deserialize and prepare the prediction input
"""
if request_content_type == "application/json":
sentences = [json.loads(request_body)]

elif request_content_type == "text/csv":
# We have a single column with the text.
sentences = list(pd.read_csv(StringIO(request_body), header=None).values[:, 0].astype(str))
else:
sentences = request_body
return sentences
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
transformers==4.2.1
torch==1.7.1
pandas
Loading

0 comments on commit 806fca7

Please sign in to comment.