Skip to content

Commit

Permalink
Fix broken links and cleanup (#3451)
Browse files Browse the repository at this point in the history
Co-authored-by: A Yacat <[email protected]>
  • Loading branch information
yacatab and A Yacat authored Jun 10, 2022
1 parent da58c29 commit 0373fbc
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 36 deletions.
4 changes: 1 addition & 3 deletions sagemaker-pipeline-multi-model/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ The following IAM roles are required:
- AmazonS3FullAccess
- AWSGlueServiceRole

[restate-project.ipynb](https://github.com/aws/amazon-sagemaker-samples/blob/main/sagemaker-pipeline-multi-model/restate-project.ipynb) has been tested in a SageMaker notebook that is using a kernel with Python 3.7 installed. This SageMaker notebook is attached with an IAM role with the following managed policies:
[restate-project.ipynb](restate-project.ipynb) has been tested in a SageMaker notebook that is using a kernel with Python 3.7 installed. This SageMaker notebook is attached with an IAM role with the following managed policies:
- AmazonEC2ContainerRegistryFullAccess
- AmazonS3FullAccess
- AWSGlueServiceNotebookRole
Expand All @@ -79,8 +79,6 @@ This SageMaker notebook is attached with an IAM role with the following in-line
}
```

See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.

## License

This library is licensed under the MIT-0 License. See the LICENSE file.
4 changes: 2 additions & 2 deletions sagemaker-pipeline-multi-model/restate-project.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"id": "0d3deddf",
"metadata": {},
"source": [
"Before proceeding, please see context of this notebook in [README.md](https://github.com/aws/amazon-sagemaker-samples/blob/main/sagemaker-pipeline-multi-model/README.md). This notebook has been tested in a SageMaker notebook that is using a kernel with Python 3.7 installed, e.g. conda_mxnet_latest_p37. Make sure you have created a SageMaker project outside of this notebook with the name `restate`. Recommendation is to create a SageMaker project using [SageMaker-provide MLOps template for model building, training, and deployment template](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-projects-templates-sm.html#sagemaker-projects-templates-code-commit). Note that this notebook will not create the SageMaker project for you. \n"
"Before proceeding, please see context of this notebook in [README.md](README.md). This notebook has been tested in a SageMaker notebook that is using a kernel with Python 3.7 installed, e.g. conda_mxnet_latest_p37. Make sure you have created a SageMaker project outside of this notebook with the name `restate`. Recommendation is to create a SageMaker project using [SageMaker-provide MLOps template for model building, training, and deployment template](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-projects-templates-sm.html#sagemaker-projects-templates-code-commit). Note that this notebook will not create the SageMaker project for you. \n"
]
},
{
Expand Down Expand Up @@ -163,7 +163,7 @@
"id": "d3930b84",
"metadata": {},
"source": [
"At this point, it is assumed that S3 bucket sagemaker-restate-`<AWS ACCOUNT ID>` and the necessary IAM roles are created. For the complete list of prerequisites, please see [README.md](https://github.com/aws/amazon-sagemaker-samples/blob/main/sagemaker-pipeline-multi-model/README.md). \n",
"At this point, it is assumed that S3 bucket sagemaker-restate-`<AWS ACCOUNT ID>` and the necessary IAM roles are created. For the complete list of prerequisites, please see [README.md](README.md). \n",
"\n",
"We move the raw data to S3 bucket sagemaker-restate-`<AWS ACCOUNT ID>`."
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def get_pipeline(
role=None,
default_bucket=None,
model_package_group_name="restatePackageGroup", # Choose any name
pipeline_name="restate-p-roj2jxb1j0eu", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
pipeline_name="restate-p-XXXXXXXXX", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
base_job_prefix="restate", # Choose any name
):
"""Gets a SageMaker ML Pipeline instance working with on RE data.
Expand Down Expand Up @@ -134,12 +134,15 @@ def get_pipeline(
input_data = ParameterString(
name="InputDataUrl",
default_value=f"", # Change this to point to the s3 location of your raw input data.
# default_value=f"s3://sagemaker-project-p-3xbrq5pwzvlw/export-flow-02-23-52-23-354f2c00/output/pipelines-4y3vbujineuo-DataWranglerProcessi-A5QsWJSsSJ/05ba8da3-2ced-4ecb-aad9-22704674d567/default/part-00000-76487ef2-610f-413c-a489-9158b771ef51-c000TEMP.csv",
# default_value=None
)

### ATHENA PIPELINE BEGIN
data_sources = []
# Sagemaker session
sess = sagemaker_session

# You can configure this with your own bucket name, e.g.
# bucket = "my-bucket"
bucket = sess.default_bucket()

data_sources.append(
ProcessingInput(
Expand All @@ -151,29 +154,21 @@ def get_pipeline(
athena_dataset_definition=AthenaDatasetDefinition(
catalog="AwsDataCatalog",
database="restate",
# query_string="SELECT * FROM resvm.russia_3870",
query_string="SELECT * FROM restate.california_10",
output_s3_uri="s3://sagemaker-restate-240964962523/athena/data-wrangler",
output_s3_uri=f"s3://{bucket}/athena/",
output_format="PARQUET",
),
),
)
)

# Sagemaker session
sess = sagemaker_session

# You can configure this with your own bucket name, e.g.
# bucket = "my-bucket"
bucket = sess.default_bucket()
print(f"Data Wrangler export storage bucket: {bucket}")

# unique flow export ID
flow_export_id = f"{time.strftime('%d-%H-%M-%S', time.gmtime())}-{str(uuid.uuid4())[:8]}"
flow_export_name = f"flow-{flow_export_id}"

# Output name is auto-generated from the select node's ID + output name from the flow file.
# output_name = "dc0bda28-f867-4501-9323-b3d571ae5c35.default"
output_name = "99ae1ec3-dd5f-453c-bfae-721dac423cd7.default"

s3_output_prefix = f"export-{flow_export_name}/output"
Expand Down Expand Up @@ -230,9 +225,6 @@ def get_pipeline(
framework="data-wrangler", # we are using the Sagemaker built in xgboost algorithm
region=region,
)
# container_uri = "119527597002.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-data-wrangler-container:1.x"
# Pinned Data Wrangler Container URL.
# container_uri_pinned = "119527597002.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-data-wrangler-container:1.14.3"

# Processing Job Instance count and instance type.
instance_count = 2
Expand Down Expand Up @@ -276,8 +268,6 @@ def get_pipeline(
job_arguments=[f"--output-config '{json.dumps(output_config)}'"],
)

### ATHENA PIPELINE END

# Processing step for feature engineering
# this processor does not have awswrangler installed
sklearn_processor = SKLearnProcessor(
Expand Down Expand Up @@ -500,7 +490,6 @@ def get_pipeline(
processor=xgb_script_eval,
inputs=[
ProcessingInput(
# source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
source=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
destination="/opt/ml/processing/model",
),
Expand Down Expand Up @@ -541,22 +530,19 @@ def get_pipeline(
xgb_eval_metrics = JsonGet(
step=xgb_step_eval,
property_file=xgb_evaluation_report,
# json_path="binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
)

dtree_eval_metrics = JsonGet(
step=dtree_step_eval,
property_file=dtree_evaluation_report,
# json_path="binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
)

# Register model step that will be conditionally executed
dtree_step_register = RegisterModel(
name="DTreeReg",
estimator=dtree_train,
# model_data=dtree_step_train.properties.ModelArtifacts.S3ModelArtifacts,
model_data=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
content_types=["text/csv"],
response_types=["text/csv"],
Expand All @@ -571,7 +557,6 @@ def get_pipeline(
xgb_step_register = RegisterModel(
name="XGBReg",
estimator=xgb_train,
# model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
model_data=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
content_types=["text/csv"],
response_types=["text/csv"],
Expand All @@ -587,13 +572,11 @@ def get_pipeline(
left=JsonGet(
step=dtree_step_eval,
property_file=dtree_evaluation_report,
# json_path="binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
),
right=JsonGet(
step=xgb_step_eval,
property_file=xgb_evaluation_report,
# json_path="binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
json_path="regression_metrics.r2s.value", # This should follow the structure of your report_dict defined in the evaluate.py file.
), # You can change the threshold here
)
Expand All @@ -615,7 +598,6 @@ def get_pipeline(
training_instance_type,
model_approval_status,
input_data
# input_data_wr
],
pipeline_experiment_config=PipelineExperimentConfig(
pipeline_name + "-" + create_date, "restate-{}".format(create_date)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
"name": "restate-california",
"catalogName": "AwsDataCatalog",
"databaseName": "restate",
"queryString": "SELECT * FROM \"restate\".\"california\"",
"s3OutputLocation": "s3://sagemaker-ap-southeast-1-240964962523/athena/",
"outputFormat": "parquet",
"workgroup": null,
"workgroupOutputLocation": null,
Expand All @@ -25,8 +23,7 @@
},
"trained_parameters": {
"query_execution_id": "ca8a88de-b2b8-403b-b841-f3a9c1a750a0",
"ctas_table_name": "sagemaker_tmp_5f97be7f253044558c96b79e9a8fad40",
"ctas_s3_output_location": "s3://sagemaker-ap-southeast-1-240964962523/athena/5f97be7f253044558c96b79e9a8fad40/data/"
"ctas_table_name": "sagemaker_tmp_5f97be7f253044558c96b79e9a8fad40"
},
"inputs": [],
"outputs": [
Expand Down Expand Up @@ -123,4 +120,4 @@
]
}
]
}
}

0 comments on commit 0373fbc

Please sign in to comment.