-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
44 changed files
with
4,980 additions
and
5,701 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
615 changes: 615 additions & 0 deletions
615
autopilot/sagemaker-autopilot-pipelines/autopilot_pipelines_demo_notebook.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
37 changes: 37 additions & 0 deletions
37
autopilot/sagemaker-autopilot-pipelines/check_autopilot_job_status.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import boto3 | ||
import json | ||
import logging | ||
|
||
sagemaker_client = boto3.client("sagemaker") | ||
|
||
|
||
def lambda_handler(event, context): | ||
try: | ||
payload = json.loads(event["Records"][0]["body"]) | ||
callback_token = payload["token"] | ||
autopilot_job = sagemaker_client.describe_auto_ml_job( | ||
AutoMLJobName=payload["arguments"]["AutopilotJobName"] | ||
) | ||
autopilot_job_status = autopilot_job["AutoMLJobStatus"] | ||
if autopilot_job_status == "Completed": | ||
sagemaker_client.send_pipeline_execution_step_success( | ||
CallbackToken=callback_token | ||
) | ||
elif autopilot_job_status in ["InProgress", "Stopping"]: | ||
raise ValueError("Autopilot training not finished yet. Retrying later...") | ||
else: | ||
sagemaker_client.send_pipeline_execution_step_failure( | ||
CallbackToken=callback_token, | ||
FailureReason=autopilot_job.get( | ||
"FailureReason", | ||
f"Autopilot training job (status: {autopilot_job_status}) failed to finish.", | ||
), | ||
) | ||
except ValueError: | ||
raise | ||
except Exception as e: | ||
logging.exception(e) | ||
sagemaker_client.send_pipeline_execution_step_failure( | ||
CallbackToken=callback_token, | ||
FailureReason=str(e), | ||
) |
110 changes: 110 additions & 0 deletions
110
autopilot/sagemaker-autopilot-pipelines/evaluate_autopilot_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import argparse | ||
import boto3 | ||
import json | ||
import os | ||
import pandas as pd | ||
import random | ||
import string | ||
import time | ||
from sklearn.metrics import f1_score, precision_score, recall_score | ||
from urllib.parse import urlparse | ||
|
||
RANDOM_SUFFIX = "".join(random.choices(string.ascii_lowercase, k=8)) | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--autopilot-job-name", type=str) | ||
parser.add_argument("--aws-region", type=str) | ||
parser.add_argument("--x-test-s3-path", type=str) | ||
parser.add_argument("--y-test-file-name", type=str) | ||
parser.add_argument("--batch-transform-output-s3-path", type=str) | ||
parser.add_argument("--instance-type", type=str) | ||
parser.add_argument("--instance-count", type=int) | ||
parser.add_argument("--local-base-path", type=str) | ||
parser.add_argument("--sagemaker-execution-role-arn", type=str) | ||
args = parser.parse_args() | ||
|
||
boto_session = boto3.session.Session(region_name=args.aws_region) | ||
s3_client = boto_session.client("s3") | ||
sagemaker_client = boto_session.client("sagemaker") | ||
|
||
# Create model | ||
model_name = args.autopilot_job_name + RANDOM_SUFFIX | ||
response = sagemaker_client.create_model( | ||
ModelName=model_name, | ||
Containers=sagemaker_client.describe_auto_ml_job( | ||
AutoMLJobName=args.autopilot_job_name | ||
)["BestCandidate"]["InferenceContainers"], | ||
ExecutionRoleArn=args.sagemaker_execution_role_arn, | ||
) | ||
|
||
# Create batch transform job | ||
batch_transform_job_name = args.autopilot_job_name + RANDOM_SUFFIX | ||
response = sagemaker_client.create_transform_job( | ||
TransformJobName=batch_transform_job_name, | ||
ModelName=model_name, | ||
TransformInput={ | ||
"DataSource": { | ||
"S3DataSource": { | ||
"S3DataType": "S3Prefix", | ||
"S3Uri": args.x_test_s3_path, | ||
} | ||
}, | ||
"ContentType": "text/csv", | ||
"SplitType": "Line", | ||
}, | ||
TransformOutput={ | ||
"S3OutputPath": args.batch_transform_output_s3_path, | ||
"AssembleWith": "Line", | ||
}, | ||
TransformResources={ | ||
"InstanceType": args.instance_type, | ||
"InstanceCount": args.instance_count, | ||
}, | ||
) | ||
|
||
# Wait for the batch transform job to finish | ||
while ( | ||
sagemaker_client.describe_transform_job(TransformJobName=batch_transform_job_name)[ | ||
"TransformJobStatus" | ||
] | ||
== "InProgress" | ||
): | ||
time.sleep(10) | ||
|
||
# Download batch transform results | ||
x_test_file_name = args.x_test_s3_path.split("/")[-1] | ||
predictions_s3_path = os.path.join( | ||
args.batch_transform_output_s3_path, x_test_file_name + ".out" | ||
) | ||
o = urlparse(predictions_s3_path) | ||
s3_client.download_file( | ||
Bucket=o.netloc, Key=o.path.strip("/"), Filename="predictions.csv" | ||
) | ||
|
||
# Create best model evaluation report | ||
y_pred = pd.read_csv("predictions.csv", header=0).iloc[:, 0] | ||
y_true = pd.read_csv( | ||
os.path.join(args.local_base_path, "data", args.y_test_file_name), header=1 | ||
) | ||
evaluation_report = { | ||
"multiclass_classification_metrics": { | ||
"weighted_f1": { | ||
"value": f1_score(y_pred, y_true, average="weighted"), | ||
"standard_deviation": "NaN", | ||
}, | ||
"weighted_precision": { | ||
"value": precision_score(y_pred, y_true, average="weighted"), | ||
"standard_deviation": "NaN", | ||
}, | ||
"weighted_recall": { | ||
"value": recall_score(y_pred, y_true, average="weighted"), | ||
"standard_deviation": "NaN", | ||
}, | ||
}, | ||
} | ||
evaluation_report_path = os.path.join( | ||
args.local_base_path, "evaluation_report", "evaluation_report.json" | ||
) | ||
os.makedirs(os.path.dirname(evaluation_report_path), exist_ok=True) | ||
with open(evaluation_report_path, "w") as f: | ||
f.write(json.dumps(evaluation_report)) |
62 changes: 62 additions & 0 deletions
62
autopilot/sagemaker-autopilot-pipelines/register_autopilot_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import boto3 | ||
import os | ||
from botocore.exceptions import ClientError | ||
from urllib.parse import urlparse | ||
|
||
s3_client = boto3.client("s3") | ||
sagemaker_client = boto3.client("sagemaker") | ||
|
||
|
||
def get_explainability_report_json_s3_path(s3_path): | ||
o = urlparse(s3_path) | ||
bucket_name = o.netloc | ||
s3_prefix = o.path.strip("/") | ||
paginator = s3_client.get_paginator("list_objects_v2") | ||
response = paginator.paginate( | ||
Bucket=bucket_name, Prefix=s3_prefix, PaginationConfig={"PageSize": 1} | ||
) | ||
for page in response: | ||
files = page.get("Contents") | ||
for file in files: | ||
if "analysis.json" in file["Key"]: | ||
return os.path.join("s3://", bucket_name, file["Key"]) | ||
|
||
|
||
def lambda_handler(event, context): | ||
# Get the explainability results from the Autopilot job | ||
autopilot_job = sagemaker_client.describe_auto_ml_job( | ||
AutoMLJobName=event["AutopilotJobName"] | ||
) | ||
explainability_report_s3_path = autopilot_job["BestCandidate"][ | ||
"CandidateProperties" | ||
]["CandidateArtifactLocations"]["Explainability"] | ||
autopilot_job["BestCandidate"]["InferenceContainers"][0].pop("Environment") | ||
sagemaker_client.create_model_package( | ||
ModelPackageName=event["ModelPackageName"], | ||
InferenceSpecification={ | ||
"Containers": autopilot_job["BestCandidate"]["InferenceContainers"], | ||
"SupportedContentTypes": ["text/csv"], | ||
"SupportedResponseMIMETypes": ["text/csv"], | ||
"SupportedTransformInstanceTypes": [event["InstanceType"]], | ||
"SupportedRealtimeInferenceInstanceTypes": [event["InstanceType"]], | ||
}, | ||
ModelApprovalStatus=event["ModelApprovalStatus"], | ||
ModelMetrics={ | ||
"ModelQuality": { | ||
"Statistics": { | ||
"ContentType": ".json", | ||
"S3Uri": os.path.join( | ||
event["EvaluationReportS3Path"], "evaluation_report.json" | ||
), | ||
}, | ||
}, | ||
"Explainability": { | ||
"Report": { | ||
"ContentType": ".json", | ||
"S3Uri": get_explainability_report_json_s3_path( | ||
explainability_report_s3_path | ||
), | ||
} | ||
}, | ||
}, | ||
) |
51 changes: 51 additions & 0 deletions
51
autopilot/sagemaker-autopilot-pipelines/start_autopilot_job.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import sys | ||
from pip._internal import main | ||
|
||
# Upgrading boto3 to the newest release to be able to use the latest SageMaker features | ||
main( | ||
[ | ||
"install", | ||
"-I", | ||
"-q", | ||
"boto3", | ||
"--target", | ||
"/tmp/", | ||
"--no-cache-dir", | ||
"--disable-pip-version-check", | ||
] | ||
) | ||
sys.path.insert(0, "/tmp/") | ||
import boto3 | ||
|
||
sagemaker_client = boto3.client("sagemaker") | ||
|
||
|
||
def lambda_handler(event, context): | ||
sagemaker_client.create_auto_ml_job( | ||
AutoMLJobName=event["AutopilotJobName"], | ||
InputDataConfig=[ | ||
{ | ||
"DataSource": { | ||
"S3DataSource": { | ||
"S3DataType": "S3Prefix", | ||
"S3Uri": event["TrainValDatasetS3Path"], | ||
} | ||
}, | ||
"TargetAttributeName": event["TargetAttributeName"], | ||
} | ||
], | ||
OutputDataConfig={"S3OutputPath": event["TrainingOutputS3Path"]}, | ||
ProblemType=event["ProblemType"], | ||
AutoMLJobObjective={"MetricName": event["AutopilotObjectiveMetricName"]}, | ||
AutoMLJobConfig={ | ||
"CompletionCriteria": { | ||
"MaxCandidates": event["MaxCandidates"], | ||
"MaxRuntimePerTrainingJobInSeconds": event[ | ||
"MaxRuntimePerTrainingJobInSeconds" | ||
], | ||
"MaxAutoMLJobRuntimeInSeconds": event["MaxAutoMLJobRuntimeInSeconds"], | ||
}, | ||
"Mode": event["AutopilotMode"], | ||
}, | ||
RoleArn=event["AutopilotExecutionRoleArn"], | ||
) |
Oops, something went wrong.