From e8a877c628709d07f6c4046a3a356328d846d419 Mon Sep 17 00:00:00 2001
From: djarpin <arpin@amazon.com>
Date: Fri, 24 Nov 2017 23:21:34 -0800
Subject: [PATCH 1/4] Updated: data_distribution_types to use Python SDK for
 upload

---
 .../AmazonAIAlgorithmsIO_pb2.py               | 101 ----
 .../data_distribution_types/convert_data.py   |  90 ----
 .../data_distribution_types.ipynb             |  44 +-
 .../data_distribution_types/record_pb2.py     | 501 ------------------
 4 files changed, 31 insertions(+), 705 deletions(-)
 delete mode 100644 advanced_functionality/data_distribution_types/AmazonAIAlgorithmsIO_pb2.py
 delete mode 100644 advanced_functionality/data_distribution_types/convert_data.py
 delete mode 100644 advanced_functionality/data_distribution_types/record_pb2.py

diff --git a/advanced_functionality/data_distribution_types/AmazonAIAlgorithmsIO_pb2.py b/advanced_functionality/data_distribution_types/AmazonAIAlgorithmsIO_pb2.py
deleted file mode 100644
index ca6db7a35f..0000000000
--- a/advanced_functionality/data_distribution_types/AmazonAIAlgorithmsIO_pb2.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: AmazonAIAlgorithmsIO.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-  name='AmazonAIAlgorithmsIO.proto',
-  package='AmazonAIAlgorithmsIO',
-  syntax='proto2',
-  serialized_pb=_b('\n\x1a\x41mazonAIAlgorithmsIO.proto\x12\x14\x41mazonAIAlgorithmsIO\"\\\n\x06Record\x12\x10\n\x04keys\x18\x01 \x03(\x04\x42\x02\x10\x01\x12\x12\n\x06values\x18\x02 \x03(\x02\x42\x02\x10\x01\x12\r\n\x05label\x18\x03 \x01(\x01\x12\x0b\n\x03uid\x18\x04 \x01(\t\x12\x10\n\x08metadata\x18\x05 \x01(\t')
-)
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-
-
-
-_RECORD = _descriptor.Descriptor(
-  name='Record',
-  full_name='AmazonAIAlgorithmsIO.Record',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='keys', full_name='AmazonAIAlgorithmsIO.Record.keys', index=0,
-      number=1, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='values', full_name='AmazonAIAlgorithmsIO.Record.values', index=1,
-      number=2, type=2, cpp_type=6, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='label', full_name='AmazonAIAlgorithmsIO.Record.label', index=2,
-      number=3, type=1, cpp_type=5, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='uid', full_name='AmazonAIAlgorithmsIO.Record.uid', index=3,
-      number=4, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='metadata', full_name='AmazonAIAlgorithmsIO.Record.metadata', index=4,
-      number=5, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=52,
-  serialized_end=144,
-)
-
-DESCRIPTOR.message_types_by_name['Record'] = _RECORD
-
-Record = _reflection.GeneratedProtocolMessageType('Record', (_message.Message,), dict(
-  DESCRIPTOR = _RECORD,
-  __module__ = 'AmazonAIAlgorithmsIO_pb2'
-  # @@protoc_insertion_point(class_scope:AmazonAIAlgorithmsIO.Record)
-  ))
-_sym_db.RegisterMessage(Record)
-
-
-_RECORD.fields_by_name['keys'].has_options = True
-_RECORD.fields_by_name['keys']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_RECORD.fields_by_name['values'].has_options = True
-_RECORD.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-# @@protoc_insertion_point(module_scope)
diff --git a/advanced_functionality/data_distribution_types/convert_data.py b/advanced_functionality/data_distribution_types/convert_data.py
deleted file mode 100644
index 419c440e51..0000000000
--- a/advanced_functionality/data_distribution_types/convert_data.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import struct
-import io
-import boto3
-import sys
-
-import AmazonAIAlgorithmsIO_pb2
-from record_pb2 import Record
-
-
-def write_recordio(f, data):
-    kmagic = 0xced7230a
-    length = len(data)
-    f.write(struct.pack('I', kmagic))
-    f.write(struct.pack('I', length))
-    upper_align = ((length + 3) >> 2) << 2
-    padding = bytes([0x00 for _ in range(upper_align - length)])
-    f.write(data)
-    f.write(padding)
-
-
-def list_to_record_bytes(values, keys=None, label=None, feature_size=None):
-    record = Record()
-
-    record.features['values'].float32_tensor.values.extend(values)
-
-    if keys is not None:
-        if feature_size is None:
-            raise ValueError("For sparse tensors the feature size must be specified.")
-
-        record.features['values'].float32_tensor.keys.extend(keys)
-
-    if feature_size is not None:
-        record.features['values'].float32_tensor.shape.extend([feature_size])
-
-    if label is not None:
-        record.label['values'].float32_tensor.values.extend([label])
-
-    return record.SerializeToString()
-
-
-def read_next(f):
-     kmagic = 0xced7230a
-     raw_bytes = f.read(4)
-     if not raw_bytes:
-         return
-     m = struct.unpack('I', raw_bytes)[0]
-     if m != kmagic:
-         raise ValueError("Incorrect encoding")
-     length = struct.unpack('I', f.read(4))[0]
-     upper_align = ((length + 3) >> 2) << 2
-     data = f.read(upper_align)
-     return data[:length]
-
- 
-def to_proto(f, labels, vectors):
-     for label, vec in zip(labels, vectors):
-         record = AmazonAIAlgorithmsIO_pb2.Record()
-         record.values.extend(vec)
-         record.label = label
-         write_recordio(f, record.SerializeToString())
- 
-
-def to_libsvm(f, labels, values):
-     f.write('\n'.join(
-         ['{} {}'.format(label, ' '.join(['{}:{}'.format(i + 1, el) for i, el in enumerate(vec)])) for label, vec in
-          zip(labels, values)]))
-     return f
-
-
-def write_to_s3(fobj, bucket, key):
-    return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(fobj)
-
-
-def upload_to_s3(partition_name, partition, bucket):
-    labels = [t.tolist() for t in partition[1]]
-    vectors = [t.tolist() for t in partition[0]]
-    f = io.BytesIO()
-    to_proto(f, labels, vectors)
-    f.seek(0)
-    key = "{}/examples".format(partition_name)
-    url = 's3n://{}/{}'.format(bucket, key)
-    print('Writing to {}'.format(url))
-    write_to_s3(f, bucket, key)
-    print('Done writing to {}'.format(url))
-
-
-def convert_data(partitions, bucket):
-    for partition_name, partition in partitions:
-        print('{}: {} {}'.format(partition_name, partition[0].shape, partition[1].shape))
-        upload_to_s3(partition_name, partition, bucket)
diff --git a/advanced_functionality/data_distribution_types/data_distribution_types.ipynb b/advanced_functionality/data_distribution_types/data_distribution_types.ipynb
index 78f20a4156..3c21fbd530 100644
--- a/advanced_functionality/data_distribution_types/data_distribution_types.ipynb
+++ b/advanced_functionality/data_distribution_types/data_distribution_types.ipynb
@@ -31,7 +31,7 @@
     "---\n",
     "# Setup\n",
     "\n",
-    "_This notebook was created and tested on an ml.m4xlarge notebook instance._\n",
+    "_This notebook was created and tested on an ml.m4.xlarge notebook instance._\n",
     "\n",
     "Let's start by specifying:\n",
     "\n",
@@ -78,11 +78,12 @@
     "import matplotlib.pyplot as plt\n",
     "from IPython.display import display\n",
     "import io\n",
-    "import convert_data\n",
     "import time\n",
     "import copy\n",
     "import json\n",
-    "import sys"
+    "import sys\n",
+    "import sagemaker.amazon.common as smac\n",
+    "import os"
    ]
   },
   {
@@ -160,7 +161,7 @@
    "metadata": {},
    "source": [
     "We can see:\n",
-    "- `EventCode` is pretty unevently distributed, with some events making up 7%+ of the observations and others being a thousandth of a percent.\n",
+    "- `EventCode` is pretty unevenly distributed, with some events making up 7%+ of the observations and others being a thousandth of a percent.\n",
     "- `AvgTone` seems to be reasonably smoothly distributed, while `NumArticles` has a long tail, and `Actor` geo features have suspiciously large spikes near 0.\n",
     "\n",
     "Let's remove the (0, 0) lat-longs, one hot encode `EventCode`, and prepare our data for a machine learning model.  For this example we'll keep things straightforward and try to predict `AvgTone`, using the other variables in our dataset as features.\n",
@@ -193,12 +194,10 @@
    "outputs": [],
    "source": [
     "def write_to_s3(bucket, prefix, channel, file_prefix, X, y):\n",
-    "    f = io.BytesIO()\n",
-    "    feature_size = X.shape[1]\n",
-    "    for features, target in zip(X, y):\n",
-    "        convert_data.write_recordio(f, convert_data.list_to_record_bytes(features, label=target, feature_size=feature_size))\n",
-    "    f.seek(0)\n",
-    "    boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, channel, file_prefix + '.data')).upload_fileobj(f)\n",
+    "    buf = io.BytesIO()\n",
+    "    smac.write_numpy_to_dense_tensor(buf, X.astype('float32'), y.astype('float32'))\n",
+    "    buf.seek(0)\n",
+    "    boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, channel, file_prefix + '.data')).upload_fileobj(buf)\n",
     "\n",
     "def transform_gdelt(df, events=None):\n",
     "    df = df[['AvgTone', 'EventCode', 'NumArticles', 'Actor1Geo_Lat', 'Actor1Geo_Long', 'Actor2Geo_Lat', 'Actor2Geo_Long']]\n",
@@ -649,7 +648,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, because POST requests to our endpoint are limited to ~6MB, we'll setup a small function to split our test data up into mini-batches, loop through and invoke our endpoint to get predictions, and gather them into a single array."
+    "Next, because POST requests to our endpoint are limited to ~6MB, we'll setup a small function to split our test data up into mini-batches that are each about 5MB, loop through and invoke our endpoint to get predictions for those mini-batches, and gather them into a single array."
    ]
   },
   {
@@ -714,10 +713,28 @@
     "\n",
     "Different algorithms can be expected to show variation in which distribution mechanism is most effective at achieving optimal compute spend per point of model accuracy.  The message remains the same though, that the process of finding the right distribution type is another experiment in optimizing model training times."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### (Optional) Clean-up\n",
+    "\n",
+    "If you're ready to be done with this notebook, please uncomment and run the cell below.  This will remove the hosted endpoints you created and avoid any charges from a stray instance being left on."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#sm.delete_endpoint(EndpointName=sharded_endpoint)\n",
+    "#sm.delete_endpoint(EndpointName=replicated_endpoint)"
+   ]
   }
  ],
  "metadata": {
-  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.",
   "kernelspec": {
    "display_name": "Environment (conda_python3)",
    "language": "python",
@@ -734,7 +751,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.3"
-  }
+  },
+  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
  },
  "nbformat": 4,
  "nbformat_minor": 2
diff --git a/advanced_functionality/data_distribution_types/record_pb2.py b/advanced_functionality/data_distribution_types/record_pb2.py
deleted file mode 100644
index e49d21d030..0000000000
--- a/advanced_functionality/data_distribution_types/record_pb2.py
+++ /dev/null
@@ -1,501 +0,0 @@
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: src/ai_algorithms_protobuf_python/record.proto
-
-import sys
-_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-  name='src/ai_algorithms_protobuf_python/record.proto',
-  package='aialgs.data',
-  syntax='proto2',
-  serialized_pb=_b('\n.src/ai_algorithms_protobuf_python/record.proto\x12\x0b\x61ialgs.data\"H\n\rFloat32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"H\n\rFloat64Tensor\x12\x12\n\x06values\x18\x01 \x03(\x01\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\"F\n\x0bInt32Tensor\x12\x12\n\x06values\x18\x01 \x03(\x05\x42\x02\x10\x01\x12\x10\n\x04keys\x18\x02 \x03(\x04\x42\x02\x10\x01\x12\x11\n\x05shape\x18\x03 \x03(\x04\x42\x02\x10\x01\",\n\x05\x42ytes\x12\r\n\x05value\x18\x01 \x03(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x02 \x01(\t\"\xd3\x01\n\x05Value\x12\x34\n\x0e\x66loat32_tensor\x18\x02 \x01(\x0b\x32\x1a.aialgs.data.Float32TensorH\x00\x12\x34\n\x0e\x66loat64_tensor\x18\x03 \x01(\x0b\x32\x1a.aialgs.data.Float64TensorH\x00\x12\x30\n\x0cint32_tensor\x18\x07 \x01(\x0b\x32\x18.aialgs.data.Int32TensorH\x00\x12#\n\x05\x62ytes\x18\t \x01(\x0b\x32\x12.aialgs.data.BytesH\x00\x42\x07\n\x05value\"\xa9\x02\n\x06Record\x12\x33\n\x08\x66\x65\x61tures\x18\x01 \x03(\x0b\x32!.aialgs.data.Record.FeaturesEntry\x12-\n\x05label\x18\x02 \x03(\x0b\x32\x1e.aialgs.data.Record.LabelEntry\x12\x0b\n\x03uid\x18\x03 \x01(\t\x12\x10\n\x08metadata\x18\x04 \x01(\t\x12\x15\n\rconfiguration\x18\x05 \x01(\t\x1a\x43\n\rFeaturesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x1a@\n\nLabelEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.aialgs.data.Value:\x02\x38\x01\x42\x30\n com.amazonaws.aialgorithms.protoB\x0cRecordProtos')
-)
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-
-
-
-_FLOAT32TENSOR = _descriptor.Descriptor(
-  name='Float32Tensor',
-  full_name='aialgs.data.Float32Tensor',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='values', full_name='aialgs.data.Float32Tensor.values', index=0,
-      number=1, type=2, cpp_type=6, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='keys', full_name='aialgs.data.Float32Tensor.keys', index=1,
-      number=2, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='shape', full_name='aialgs.data.Float32Tensor.shape', index=2,
-      number=3, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=63,
-  serialized_end=135,
-)
-
-
-_FLOAT64TENSOR = _descriptor.Descriptor(
-  name='Float64Tensor',
-  full_name='aialgs.data.Float64Tensor',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='values', full_name='aialgs.data.Float64Tensor.values', index=0,
-      number=1, type=1, cpp_type=5, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='keys', full_name='aialgs.data.Float64Tensor.keys', index=1,
-      number=2, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='shape', full_name='aialgs.data.Float64Tensor.shape', index=2,
-      number=3, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=137,
-  serialized_end=209,
-)
-
-
-_INT32TENSOR = _descriptor.Descriptor(
-  name='Int32Tensor',
-  full_name='aialgs.data.Int32Tensor',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='values', full_name='aialgs.data.Int32Tensor.values', index=0,
-      number=1, type=5, cpp_type=1, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='keys', full_name='aialgs.data.Int32Tensor.keys', index=1,
-      number=2, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-    _descriptor.FieldDescriptor(
-      name='shape', full_name='aialgs.data.Int32Tensor.shape', index=2,
-      number=3, type=4, cpp_type=4, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=211,
-  serialized_end=281,
-)
-
-
-_BYTES = _descriptor.Descriptor(
-  name='Bytes',
-  full_name='aialgs.data.Bytes',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='value', full_name='aialgs.data.Bytes.value', index=0,
-      number=1, type=12, cpp_type=9, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='content_type', full_name='aialgs.data.Bytes.content_type', index=1,
-      number=2, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=283,
-  serialized_end=327,
-)
-
-
-_VALUE = _descriptor.Descriptor(
-  name='Value',
-  full_name='aialgs.data.Value',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='float32_tensor', full_name='aialgs.data.Value.float32_tensor', index=0,
-      number=2, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='float64_tensor', full_name='aialgs.data.Value.float64_tensor', index=1,
-      number=3, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='int32_tensor', full_name='aialgs.data.Value.int32_tensor', index=2,
-      number=7, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='bytes', full_name='aialgs.data.Value.bytes', index=3,
-      number=9, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-    _descriptor.OneofDescriptor(
-      name='value', full_name='aialgs.data.Value.value',
-      index=0, containing_type=None, fields=[]),
-  ],
-  serialized_start=330,
-  serialized_end=541,
-)
-
-
-_RECORD_FEATURESENTRY = _descriptor.Descriptor(
-  name='FeaturesEntry',
-  full_name='aialgs.data.Record.FeaturesEntry',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='key', full_name='aialgs.data.Record.FeaturesEntry.key', index=0,
-      number=1, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='value', full_name='aialgs.data.Record.FeaturesEntry.value', index=1,
-      number=2, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=708,
-  serialized_end=775,
-)
-
-_RECORD_LABELENTRY = _descriptor.Descriptor(
-  name='LabelEntry',
-  full_name='aialgs.data.Record.LabelEntry',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='key', full_name='aialgs.data.Record.LabelEntry.key', index=0,
-      number=1, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='value', full_name='aialgs.data.Record.LabelEntry.value', index=1,
-      number=2, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=777,
-  serialized_end=841,
-)
-
-_RECORD = _descriptor.Descriptor(
-  name='Record',
-  full_name='aialgs.data.Record',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='features', full_name='aialgs.data.Record.features', index=0,
-      number=1, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='label', full_name='aialgs.data.Record.label', index=1,
-      number=2, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='uid', full_name='aialgs.data.Record.uid', index=2,
-      number=3, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='metadata', full_name='aialgs.data.Record.metadata', index=3,
-      number=4, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-    _descriptor.FieldDescriptor(
-      name='configuration', full_name='aialgs.data.Record.configuration', index=4,
-      number=5, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=_b("").decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None),
-  ],
-  extensions=[
-  ],
-  nested_types=[_RECORD_FEATURESENTRY, _RECORD_LABELENTRY, ],
-  enum_types=[
-  ],
-  options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=544,
-  serialized_end=841,
-)
-
-_VALUE.fields_by_name['float32_tensor'].message_type = _FLOAT32TENSOR
-_VALUE.fields_by_name['float64_tensor'].message_type = _FLOAT64TENSOR
-_VALUE.fields_by_name['int32_tensor'].message_type = _INT32TENSOR
-_VALUE.fields_by_name['bytes'].message_type = _BYTES
-_VALUE.oneofs_by_name['value'].fields.append(
-  _VALUE.fields_by_name['float32_tensor'])
-_VALUE.fields_by_name['float32_tensor'].containing_oneof = _VALUE.oneofs_by_name['value']
-_VALUE.oneofs_by_name['value'].fields.append(
-  _VALUE.fields_by_name['float64_tensor'])
-_VALUE.fields_by_name['float64_tensor'].containing_oneof = _VALUE.oneofs_by_name['value']
-_VALUE.oneofs_by_name['value'].fields.append(
-  _VALUE.fields_by_name['int32_tensor'])
-_VALUE.fields_by_name['int32_tensor'].containing_oneof = _VALUE.oneofs_by_name['value']
-_VALUE.oneofs_by_name['value'].fields.append(
-  _VALUE.fields_by_name['bytes'])
-_VALUE.fields_by_name['bytes'].containing_oneof = _VALUE.oneofs_by_name['value']
-_RECORD_FEATURESENTRY.fields_by_name['value'].message_type = _VALUE
-_RECORD_FEATURESENTRY.containing_type = _RECORD
-_RECORD_LABELENTRY.fields_by_name['value'].message_type = _VALUE
-_RECORD_LABELENTRY.containing_type = _RECORD
-_RECORD.fields_by_name['features'].message_type = _RECORD_FEATURESENTRY
-_RECORD.fields_by_name['label'].message_type = _RECORD_LABELENTRY
-DESCRIPTOR.message_types_by_name['Float32Tensor'] = _FLOAT32TENSOR
-DESCRIPTOR.message_types_by_name['Float64Tensor'] = _FLOAT64TENSOR
-DESCRIPTOR.message_types_by_name['Int32Tensor'] = _INT32TENSOR
-DESCRIPTOR.message_types_by_name['Bytes'] = _BYTES
-DESCRIPTOR.message_types_by_name['Value'] = _VALUE
-DESCRIPTOR.message_types_by_name['Record'] = _RECORD
-
-Float32Tensor = _reflection.GeneratedProtocolMessageType('Float32Tensor', (_message.Message,), dict(
-  DESCRIPTOR = _FLOAT32TENSOR,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Float32Tensor)
-  ))
-_sym_db.RegisterMessage(Float32Tensor)
-
-Float64Tensor = _reflection.GeneratedProtocolMessageType('Float64Tensor', (_message.Message,), dict(
-  DESCRIPTOR = _FLOAT64TENSOR,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Float64Tensor)
-  ))
-_sym_db.RegisterMessage(Float64Tensor)
-
-Int32Tensor = _reflection.GeneratedProtocolMessageType('Int32Tensor', (_message.Message,), dict(
-  DESCRIPTOR = _INT32TENSOR,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Int32Tensor)
-  ))
-_sym_db.RegisterMessage(Int32Tensor)
-
-Bytes = _reflection.GeneratedProtocolMessageType('Bytes', (_message.Message,), dict(
-  DESCRIPTOR = _BYTES,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Bytes)
-  ))
-_sym_db.RegisterMessage(Bytes)
-
-Value = _reflection.GeneratedProtocolMessageType('Value', (_message.Message,), dict(
-  DESCRIPTOR = _VALUE,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Value)
-  ))
-_sym_db.RegisterMessage(Value)
-
-Record = _reflection.GeneratedProtocolMessageType('Record', (_message.Message,), dict(
-
-  FeaturesEntry = _reflection.GeneratedProtocolMessageType('FeaturesEntry', (_message.Message,), dict(
-    DESCRIPTOR = _RECORD_FEATURESENTRY,
-    __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-    # @@protoc_insertion_point(class_scope:aialgs.data.Record.FeaturesEntry)
-    ))
-  ,
-
-  LabelEntry = _reflection.GeneratedProtocolMessageType('LabelEntry', (_message.Message,), dict(
-    DESCRIPTOR = _RECORD_LABELENTRY,
-    __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-    # @@protoc_insertion_point(class_scope:aialgs.data.Record.LabelEntry)
-    ))
-  ,
-  DESCRIPTOR = _RECORD,
-  __module__ = 'src.ai_algorithms_protobuf_python.record_pb2'
-  # @@protoc_insertion_point(class_scope:aialgs.data.Record)
-  ))
-_sym_db.RegisterMessage(Record)
-_sym_db.RegisterMessage(Record.FeaturesEntry)
-_sym_db.RegisterMessage(Record.LabelEntry)
-
-
-DESCRIPTOR.has_options = True
-DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n com.amazonaws.aialgorithms.protoB\014RecordProtos'))
-_FLOAT32TENSOR.fields_by_name['values'].has_options = True
-_FLOAT32TENSOR.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_FLOAT32TENSOR.fields_by_name['keys'].has_options = True
-_FLOAT32TENSOR.fields_by_name['keys']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_FLOAT32TENSOR.fields_by_name['shape'].has_options = True
-_FLOAT32TENSOR.fields_by_name['shape']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_FLOAT64TENSOR.fields_by_name['values'].has_options = True
-_FLOAT64TENSOR.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_FLOAT64TENSOR.fields_by_name['keys'].has_options = True
-_FLOAT64TENSOR.fields_by_name['keys']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_FLOAT64TENSOR.fields_by_name['shape'].has_options = True
-_FLOAT64TENSOR.fields_by_name['shape']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_INT32TENSOR.fields_by_name['values'].has_options = True
-_INT32TENSOR.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_INT32TENSOR.fields_by_name['keys'].has_options = True
-_INT32TENSOR.fields_by_name['keys']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_INT32TENSOR.fields_by_name['shape'].has_options = True
-_INT32TENSOR.fields_by_name['shape']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
-_RECORD_FEATURESENTRY.has_options = True
-_RECORD_FEATURESENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001'))
-_RECORD_LABELENTRY.has_options = True
-_RECORD_LABELENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001'))
-# @@protoc_insertion_point(module_scope)

From 8c56b27183a9ffaf64651b980d5ce117e307a7af Mon Sep 17 00:00:00 2001
From: djarpin <arpin@amazon.com>
Date: Fri, 24 Nov 2017 23:39:55 -0800
Subject: [PATCH 2/4] Updated: kmeans_byom from feedback

---
 .../kmeans_bring_your_own_model.ipynb         | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb b/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
index 7a1aac4b75..3dd2ea9244 100644
--- a/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
+++ b/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
@@ -33,10 +33,12 @@
     "---\n",
     "## Setup\n",
     "\n",
+    "_This notebook was created and tested on an ml.m4.xlarge notebook instance._\n",
+    "\n",
     "Let's start by specifying:\n",
     "\n",
-    "- The IAM role arn used to give learning and hosting access to your data. See the documentation for how to create these.  Note, if more than one role is required for notebook instances, training, and/or hosting, please replace the boto call with a the appropriate full IAM role arn string.\n",
-    "- The S3 bucket and prefix where you'll be storing your model data."
+    "- The S3 bucket and prefix that you want to use for training and model data.  This should be within the same region as the Notebook Instance, training, and hosting.\n",
+    "- The IAM role arn used to give training and hosting access to your data. See the documentation for how to create these.  Note, if more than one role is required for notebook instances, training, and/or hosting, please replace the boto regexp with a the appropriate full IAM role arn string(s)."
    ]
   },
   {
@@ -47,12 +49,15 @@
    },
    "outputs": [],
    "source": [
-    "import boto3\n",
-    "\n",
-    "role = boto3.client('iam').list_instance_profiles()['InstanceProfiles'][0]['Roles'][0]['Arn']\n",
-    "\n",
     "bucket = '<your_s3_bucket_name_here>'\n",
-    "prefix = 'sagemaker/kmeans_byom'"
+    "prefix = 'sagemaker/kmeans_byom'\n",
+    " \n",
+    "# Define IAM role\n",
+    "import boto3\n",
+    "import re\n",
+    " \n",
+    "assumed_role = boto3.client('sts').get_caller_identity()['Arn']\n",
+    "role = re.sub(r'^(.+)sts::(\\d+):assumed-role/(.+?)/.*$', r'\\1iam::\\2:role/\\3', assumed_role)"
    ]
   },
   {
@@ -100,7 +105,7 @@
     "\n",
     "### Data\n",
     "\n",
-    "For simplicity, we'll utilize the MNIST handwritten digit dataset."
+    "For simplicity, we'll utilize the MNIST dataset.  This includes roughly 70K 28 x 28 pixel images of handwritten digits from 0 to 9.  More detail can be found [here](https://en.wikipedia.org/wiki/MNIST_database)."
    ]
   },
   {
@@ -351,10 +356,18 @@
     "\n",
     "This notebook showed how to seed a pre-existing model in an already built container.  This functionality could be replicated with other Amazon SageMaker Algorithms, as well as the TensorFlow and MXNet containers.  Although this is certainly an easy method to bring your own model, it is not likely to provide the flexibility of a bringing your own scoring container.  Please refer to other example notebooks which show how to dockerize your own training and scoring could which could be modified appropriately to your use case."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sm.delete_endpoint(EndpointName=kmeans_endpoint)"
+   ]
   }
  ],
  "metadata": {
-  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.",
   "kernelspec": {
    "display_name": "Environment (conda_mxnet_p36)",
    "language": "python",
@@ -371,7 +384,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.3"
-  }
+  },
+  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
  },
  "nbformat": 4,
  "nbformat_minor": 2

From ae5d8fe8b67570ccdfee93787572187eae367d7b Mon Sep 17 00:00:00 2001
From: djarpin <arpin@amazon.com>
Date: Fri, 24 Nov 2017 23:41:28 -0800
Subject: [PATCH 3/4] Updated: kmeans_byom markdown typo

---
 .../kmeans_bring_your_own_model.ipynb                           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb b/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
index 3dd2ea9244..d568cb017c 100644
--- a/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
+++ b/advanced_functionality/kmeans_bring_your_own_model/kmeans_bring_your_own_model.ipynb
@@ -354,7 +354,7 @@
     "\n",
     "## Extensions\n",
     "\n",
-    "This notebook showed how to seed a pre-existing model in an already built container.  This functionality could be replicated with other Amazon SageMaker Algorithms, as well as the TensorFlow and MXNet containers.  Although this is certainly an easy method to bring your own model, it is not likely to provide the flexibility of a bringing your own scoring container.  Please refer to other example notebooks which show how to dockerize your own training and scoring could which could be modified appropriately to your use case."
+    "This notebook showed how to seed a pre-existing model in an already built container.  This functionality could be replicated with other Amazon SageMaker Algorithms, as well as the TensorFlow and MXNet containers.  Although this is certainly an easy method to bring your own model, it is not likely to provide the flexibility of a bringing your own scoring container.  Please refer to other example notebooks which show how to dockerize your own training and scoring container which could be modified appropriately to your use case."
    ]
   },
   {

From eebf64606612d5386c11b6c6bafce35fdaecd238 Mon Sep 17 00:00:00 2001
From: djarpin <arpin@amazon.com>
Date: Sat, 25 Nov 2017 16:47:57 -0800
Subject: [PATCH 4/4] Updated: All files for changes to platform

---
 .../r_bring_your_own/Dockerfile               |   2 +-
 .../r_bring_your_own/README.md                |   2 -
 .../r_bring_your_own/build_and_push.sh        |  56 ----
 .../r_bring_your_own/iris.csv                 | 151 +++++++++++
 .../r_bring_your_own/mars.R                   |   9 +
 .../r_bring_your_own/plumber.R                |  11 +-
 .../r_bring_your_own/r_bring_your_own.ipynb   | 242 +++++++++++++-----
 7 files changed, 343 insertions(+), 130 deletions(-)
 delete mode 100644 advanced_functionality/r_bring_your_own/build_and_push.sh
 create mode 100644 advanced_functionality/r_bring_your_own/iris.csv

diff --git a/advanced_functionality/r_bring_your_own/Dockerfile b/advanced_functionality/r_bring_your_own/Dockerfile
index b178469fb4..8f441f5113 100644
--- a/advanced_functionality/r_bring_your_own/Dockerfile
+++ b/advanced_functionality/r_bring_your_own/Dockerfile
@@ -1,6 +1,6 @@
 FROM ubuntu:16.04
 
-MAINTAINER David Arpin <arpin@amazon.com>
+MAINTAINER Amazon SageMaker Examples <amazon-sagemaker-examples@amazon.com>
 
 RUN apt-get -y update && apt-get install -y --no-install-recommends \
     wget \
diff --git a/advanced_functionality/r_bring_your_own/README.md b/advanced_functionality/r_bring_your_own/README.md
index 39774b4b40..9666dfe06d 100644
--- a/advanced_functionality/r_bring_your_own/README.md
+++ b/advanced_functionality/r_bring_your_own/README.md
@@ -6,6 +6,4 @@ This folder contains one notebook and several helper files:
 
 *Dockerfile:* is the necessary configuration for building a docker container that calls the `mars.R` script.
 
-*build_and_push.sh:* is a short shell script that will build and publish the algorithm's Docker container to AWS ECR.  Running `source build_and_push.sh rmars` from the shell of a system with Docker and the proper credentials will create an ECR container that aligns to `r_bring_your_own.ipynb`.
-
 *r_bring_your_own.ipynb:* is a notebook that calls the custom container once built and pushed into ECR.
diff --git a/advanced_functionality/r_bring_your_own/build_and_push.sh b/advanced_functionality/r_bring_your_own/build_and_push.sh
deleted file mode 100644
index b5950d2271..0000000000
--- a/advanced_functionality/r_bring_your_own/build_and_push.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env bash
-
-image=$1
-
-if [ "$image" == "" ]
-then
-    echo "Usage: $0 <image-name>"
-    exit 1
-fi
-
-account=$(aws sts get-caller-identity --output text | awk '{print $1}')
-
-fullname="${account}.dkr.ecr.us-west-2.amazonaws.com/${image}:latest"
-
-aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
-
-if [ $? -ne 0 ]
-then
-    aws ecr create-repository --repository-name "${image}" > /dev/null
-
-    policy=/tmp/ecr-repo-policy-$$.json
-    cat <<'EOF' > ${policy}
-{
-  "Version": "2008-10-17",
-  "Statement": [
-    {
-      "Sid": "IMAccessRole",
-      "Effect": "Allow",
-      "Principal": {
-        "AWS": [
-          "arn:aws:iam::920080109247:root",
-          "arn:aws:iam::786604636886:root"
-        ]
-      },
-      "Action": [
-        "ecr:GetDownloadUrlForLayer",
-        "ecr:BatchGetImage",
-        "ecr:BatchCheckLayerAvailability"
-      ]
-    }
-  ]
-}
-EOF
-    function cleanup {
-        rm -f ${policy}
-    }
-    trap cleanup EXIT TERM INT
-
-    aws ecr set-repository-policy --repository-name "${image}" --policy-text file://${policy} > /dev/null
-fi
-
-`aws ecr get-login --region us-west-2 | sed -e 's/ -e *[^ ]*//g'`
-
-docker build -t ${fullname} .
-
-docker push ${fullname}
diff --git a/advanced_functionality/r_bring_your_own/iris.csv b/advanced_functionality/r_bring_your_own/iris.csv
new file mode 100644
index 0000000000..8b6393099a
--- /dev/null
+++ b/advanced_functionality/r_bring_your_own/iris.csv
@@ -0,0 +1,151 @@
+Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
+5.1,3.5,1.4,0.2,setosa
+4.9,3,1.4,0.2,setosa
+4.7,3.2,1.3,0.2,setosa
+4.6,3.1,1.5,0.2,setosa
+5,3.6,1.4,0.2,setosa
+5.4,3.9,1.7,0.4,setosa
+4.6,3.4,1.4,0.3,setosa
+5,3.4,1.5,0.2,setosa
+4.4,2.9,1.4,0.2,setosa
+4.9,3.1,1.5,0.1,setosa
+5.4,3.7,1.5,0.2,setosa
+4.8,3.4,1.6,0.2,setosa
+4.8,3,1.4,0.1,setosa
+4.3,3,1.1,0.1,setosa
+5.8,4,1.2,0.2,setosa
+5.7,4.4,1.5,0.4,setosa
+5.4,3.9,1.3,0.4,setosa
+5.1,3.5,1.4,0.3,setosa
+5.7,3.8,1.7,0.3,setosa
+5.1,3.8,1.5,0.3,setosa
+5.4,3.4,1.7,0.2,setosa
+5.1,3.7,1.5,0.4,setosa
+4.6,3.6,1,0.2,setosa
+5.1,3.3,1.7,0.5,setosa
+4.8,3.4,1.9,0.2,setosa
+5,3,1.6,0.2,setosa
+5,3.4,1.6,0.4,setosa
+5.2,3.5,1.5,0.2,setosa
+5.2,3.4,1.4,0.2,setosa
+4.7,3.2,1.6,0.2,setosa
+4.8,3.1,1.6,0.2,setosa
+5.4,3.4,1.5,0.4,setosa
+5.2,4.1,1.5,0.1,setosa
+5.5,4.2,1.4,0.2,setosa
+4.9,3.1,1.5,0.2,setosa
+5,3.2,1.2,0.2,setosa
+5.5,3.5,1.3,0.2,setosa
+4.9,3.6,1.4,0.1,setosa
+4.4,3,1.3,0.2,setosa
+5.1,3.4,1.5,0.2,setosa
+5,3.5,1.3,0.3,setosa
+4.5,2.3,1.3,0.3,setosa
+4.4,3.2,1.3,0.2,setosa
+5,3.5,1.6,0.6,setosa
+5.1,3.8,1.9,0.4,setosa
+4.8,3,1.4,0.3,setosa
+5.1,3.8,1.6,0.2,setosa
+4.6,3.2,1.4,0.2,setosa
+5.3,3.7,1.5,0.2,setosa
+5,3.3,1.4,0.2,setosa
+7,3.2,4.7,1.4,versicolor
+6.4,3.2,4.5,1.5,versicolor
+6.9,3.1,4.9,1.5,versicolor
+5.5,2.3,4,1.3,versicolor
+6.5,2.8,4.6,1.5,versicolor
+5.7,2.8,4.5,1.3,versicolor
+6.3,3.3,4.7,1.6,versicolor
+4.9,2.4,3.3,1,versicolor
+6.6,2.9,4.6,1.3,versicolor
+5.2,2.7,3.9,1.4,versicolor
+5,2,3.5,1,versicolor
+5.9,3,4.2,1.5,versicolor
+6,2.2,4,1,versicolor
+6.1,2.9,4.7,1.4,versicolor
+5.6,2.9,3.6,1.3,versicolor
+6.7,3.1,4.4,1.4,versicolor
+5.6,3,4.5,1.5,versicolor
+5.8,2.7,4.1,1,versicolor
+6.2,2.2,4.5,1.5,versicolor
+5.6,2.5,3.9,1.1,versicolor
+5.9,3.2,4.8,1.8,versicolor
+6.1,2.8,4,1.3,versicolor
+6.3,2.5,4.9,1.5,versicolor
+6.1,2.8,4.7,1.2,versicolor
+6.4,2.9,4.3,1.3,versicolor
+6.6,3,4.4,1.4,versicolor
+6.8,2.8,4.8,1.4,versicolor
+6.7,3,5,1.7,versicolor
+6,2.9,4.5,1.5,versicolor
+5.7,2.6,3.5,1,versicolor
+5.5,2.4,3.8,1.1,versicolor
+5.5,2.4,3.7,1,versicolor
+5.8,2.7,3.9,1.2,versicolor
+6,2.7,5.1,1.6,versicolor
+5.4,3,4.5,1.5,versicolor
+6,3.4,4.5,1.6,versicolor
+6.7,3.1,4.7,1.5,versicolor
+6.3,2.3,4.4,1.3,versicolor
+5.6,3,4.1,1.3,versicolor
+5.5,2.5,4,1.3,versicolor
+5.5,2.6,4.4,1.2,versicolor
+6.1,3,4.6,1.4,versicolor
+5.8,2.6,4,1.2,versicolor
+5,2.3,3.3,1,versicolor
+5.6,2.7,4.2,1.3,versicolor
+5.7,3,4.2,1.2,versicolor
+5.7,2.9,4.2,1.3,versicolor
+6.2,2.9,4.3,1.3,versicolor
+5.1,2.5,3,1.1,versicolor
+5.7,2.8,4.1,1.3,versicolor
+6.3,3.3,6,2.5,virginica
+5.8,2.7,5.1,1.9,virginica
+7.1,3,5.9,2.1,virginica
+6.3,2.9,5.6,1.8,virginica
+6.5,3,5.8,2.2,virginica
+7.6,3,6.6,2.1,virginica
+4.9,2.5,4.5,1.7,virginica
+7.3,2.9,6.3,1.8,virginica
+6.7,2.5,5.8,1.8,virginica
+7.2,3.6,6.1,2.5,virginica
+6.5,3.2,5.1,2,virginica
+6.4,2.7,5.3,1.9,virginica
+6.8,3,5.5,2.1,virginica
+5.7,2.5,5,2,virginica
+5.8,2.8,5.1,2.4,virginica
+6.4,3.2,5.3,2.3,virginica
+6.5,3,5.5,1.8,virginica
+7.7,3.8,6.7,2.2,virginica
+7.7,2.6,6.9,2.3,virginica
+6,2.2,5,1.5,virginica
+6.9,3.2,5.7,2.3,virginica
+5.6,2.8,4.9,2,virginica
+7.7,2.8,6.7,2,virginica
+6.3,2.7,4.9,1.8,virginica
+6.7,3.3,5.7,2.1,virginica
+7.2,3.2,6,1.8,virginica
+6.2,2.8,4.8,1.8,virginica
+6.1,3,4.9,1.8,virginica
+6.4,2.8,5.6,2.1,virginica
+7.2,3,5.8,1.6,virginica
+7.4,2.8,6.1,1.9,virginica
+7.9,3.8,6.4,2,virginica
+6.4,2.8,5.6,2.2,virginica
+6.3,2.8,5.1,1.5,virginica
+6.1,2.6,5.6,1.4,virginica
+7.7,3,6.1,2.3,virginica
+6.3,3.4,5.6,2.4,virginica
+6.4,3.1,5.5,1.8,virginica
+6,3,4.8,1.8,virginica
+6.9,3.1,5.4,2.1,virginica
+6.7,3.1,5.6,2.4,virginica
+6.9,3.1,5.1,2.3,virginica
+5.8,2.7,5.1,1.9,virginica
+6.8,3.2,5.9,2.3,virginica
+6.7,3.3,5.7,2.5,virginica
+6.7,3,5.2,2.3,virginica
+6.3,2.5,5,1.9,virginica
+6.5,3,5.2,2,virginica
+6.2,3.4,5.4,2.3,virginica
+5.9,3,5.1,1.8,virginica
diff --git a/advanced_functionality/r_bring_your_own/mars.R b/advanced_functionality/r_bring_your_own/mars.R
index afa15b5847..4ee0e77fa3 100644
--- a/advanced_functionality/r_bring_your_own/mars.R
+++ b/advanced_functionality/r_bring_your_own/mars.R
@@ -1,3 +1,12 @@
+# Copyright [first edit year]-[latest edit year] Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+
+
 # Bring in library that contains multivariate adaptive regression splines (MARS)
 library(mda)
 
diff --git a/advanced_functionality/r_bring_your_own/plumber.R b/advanced_functionality/r_bring_your_own/plumber.R
index 4cf6f041f2..5d015a126d 100644
--- a/advanced_functionality/r_bring_your_own/plumber.R
+++ b/advanced_functionality/r_bring_your_own/plumber.R
@@ -1,10 +1,17 @@
 # plumber.R
+# Copyright [first edit year]-[latest edit year] Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at
+# 
+#     http://aws.amazon.com/apache2.0/
+# 
+# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 
 
 #' Ping to show server is there
-#' @post /ping
+#' @get /ping
 function() {
-    list(status='200', code='200')}
+    return('')}
 
 
 #' Parse input and return prediction from model
diff --git a/advanced_functionality/r_bring_your_own/r_bring_your_own.ipynb b/advanced_functionality/r_bring_your_own/r_bring_your_own.ipynb
index 11f5b6f8a9..763aed17a6 100644
--- a/advanced_functionality/r_bring_your_own/r_bring_your_own.ipynb
+++ b/advanced_functionality/r_bring_your_own/r_bring_your_own.ipynb
@@ -34,28 +34,52 @@
     "---\n",
     "## Preparation\n",
     "\n",
-    "Let's start by defining the region, bucket, and prefix information we'll use."
+    "_This notebook was created and tested on an ml.m4.xlarge notebook instance._\n",
+    "\n",
+    "Let's start by specifying:\n",
+    "\n",
+    "- The S3 bucket and prefix that you want to use for training and model data.  This should be within the same region as the Notebook Instance, training, and hosting.\n",
+    "- The IAM role arn used to give training and hosting access to your data. See the documentation for how to create these.  Note, if more than one role is required for notebook instances, training, and/or hosting, please replace the boto regexp with a the appropriate full IAM role arn string(s)."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "isConfigCell": true
    },
    "outputs": [],
    "source": [
-    "import os\n",
+    "bucket = '<your_s3_bucket_name_here>'\n",
+    "prefix = 'sagemaker/r_byo'\n",
+    " \n",
+    "# Define IAM role\n",
     "import boto3\n",
+    "import re\n",
+    " \n",
+    "assumed_role = boto3.client('sts').get_caller_identity()['Arn']\n",
+    "role = re.sub(r'^(.+)sts::(\\d+):assumed-role/(.+?)/.*$', r'\\1iam::\\2:role/\\3', assumed_role)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we'll import the libraries we'll need for the remainder of the notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "import time\n",
     "import json\n",
-    "\n",
-    "os.environ['AWS_DEFAULT_REGION'] = 'us-west-2'\n",
-    "role = boto3.client('iam').list_instance_profiles()['InstanceProfiles'][0]['Roles'][0]['Arn']\n",
-    "\n",
-    "bucket = '<your_s3_bucket_name_here>'\n",
-    "prefix = '<your_s3_prefix_here>'"
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt"
    ]
   },
   {
@@ -65,11 +89,11 @@
     "---\n",
     "## Code\n",
     "\n",
-    "For this example, we'll need 4 supporting code files.\n",
+    "For this example, we'll need 3 supporting code files.\n",
     "\n",
     "### Fit\n",
     "\n",
-    "`mars.R` creates functions to fit and serve our model.  The algorithm we've chose to use is [Multivariate Adaptive Regression Splines (MARS)](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_splines).  This is a suitable example as it's a unique and powerful algorithm, but isn't as broadly used as Amazon SageMarker algorithm, and it isn't available in Python's scikit-learn library.  R's repository of packages is filled with algorithms that share these same criteria. "
+    "`mars.R` creates functions to fit and serve our model.  The algorithm we've chosen to use is [Multivariate Adaptive Regression Splines](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_splines).  This is a suitable example as it's a unique and powerful algorithm, but isn't as broadly used as Amazon SageMarker algorithms, and it isn't available in Python's scikit-learn library.  R's repository of packages is filled with algorithms that share these same criteria. "
    ]
   },
   {
@@ -78,7 +102,7 @@
    "source": [
     "_The top of the code is devoted to setup.  Bringing in the libraries we'll need and setting up the file paths as detailed in Amazon SageMaker documentation on bringing your own container._\n",
     "\n",
-    "```R\n",
+    "```\n",
     "# Bring in library that contains multivariate adaptive regression splines (MARS)\n",
     "library(mda)\n",
     "\n",
@@ -166,7 +190,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Finally, a bit of logic to determine if, based on the options passed to calling this script, we are using the container for training or hosting._\n",
+    "_Finally, a bit of logic to determine if, based on the options passed when Amazon SageMaker Training or Hosting call this script, we are using the container to train an algorithm or host a model._\n",
     "\n",
     "```\n",
     "# Run at start-up\n",
@@ -192,7 +216,7 @@
    "source": [
     "Per the Amazon SageMaker documentation, our service needs to accept post requests to ping and invocations.  plumber specifies this with custom comments, followed by functions that take specific arguments.\n",
     "\n",
-    "Here invocations does most of the work, ingesting our trained model, handling the http request body, and producing a CSV output of predictions.\n",
+    "Here invocations does most of the work, ingesting our trained model, handling the HTTP request body, and producing a CSV output of predictions.\n",
     "\n",
     "```\n",
     "# plumber.R\n",
@@ -204,7 +228,7 @@
     "    list(status='200', code='200')}\n",
     "\n",
     "\n",
-    "#' Echo the parameter that was sent in\n",
+    "#' Parse input and return the prediction from the model\n",
     "#' @param req The http request sent\n",
     "#' @post /invocations\n",
     "function(req) {\n",
@@ -262,16 +286,59 @@
    "metadata": {},
    "source": [
     "### Publish\n",
-    "Now, to publish this container to ECR, we run the `build_and_push.sh` script using `source build_and_push.sh rmars` from the terminal.  This code sets up permissions and naming with minimal effort.  We'll skip the details for the sake of brevity.\n",
+    "Now, to publish this container to ECR, we'll run the comands below.\n",
+    "\n",
+    "This command will take several minutes to run the first time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%sh\n",
+    "\n",
+    "# The name of our algorithm\n",
+    "algorithm_name=rmars\n",
+    "\n",
+    "#set -e # stop if anything fails\n",
+    "\n",
+    "account=$(aws sts get-caller-identity --query Account --output text)\n",
+    "\n",
+    "# Get the region defined in the current configuration (default to us-west-2 if none defined)\n",
+    "region=$(aws configure get region)\n",
+    "region=${region:-us-west-2}\n",
+    "\n",
+    "fullname=\"${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest\"\n",
+    "\n",
+    "# If the repository doesn't exist in ECR, create it.\n",
     "\n",
+    "aws ecr describe-repositories --repository-names \"${algorithm_name}\" > /dev/null 2>&1\n",
+    "\n",
+    "if [ $? -ne 0 ]\n",
+    "then\n",
+    "    aws ecr create-repository --repository-name \"${algorithm_name}\" > /dev/null\n",
+    "fi\n",
+    "\n",
+    "# Get the login command from ECR and execute it directly\n",
+    "$(aws ecr get-login --region ${region} --no-include-email)\n",
+    "\n",
+    "# Build the docker image locally with the image name and then push it to ECR\n",
+    "# with the full name.\n",
+    "docker build  -t ${algorithm_name} .\n",
+    "docker tag ${algorithm_name} ${fullname}\n",
+    "\n",
+    "docker push ${fullname}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "---\n",
     "## Data\n",
-    "For this illustrative example, we'll use the simple `iris` dataset which can be brought into R using:\n",
-    "\n",
-    "```R\n",
-    "data(iris)\n",
-    "write.csv(iris, file='iris.csv', row.names=FALSE)\n",
-    "```\n",
+    "For this illustrative example, we'll simply use `iris`.  This a classic, but small, dataset used to test supervised learning algorithms.  Typically the goal is to predict one of three flower species based on various measurements of the flowers' attributes.  Further detail can be found [here](https://en.wikipedia.org/wiki/Iris_flower_data_set).\n",
     "\n",
     "Then let's copy the data to S3."
    ]
@@ -279,9 +346,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "train_file = 'iris.csv'\n",
@@ -303,6 +368,24 @@
     "## Train\n",
     "\n",
     "Now, let's setup the information needed to train a Multivariate Adaptive Regression Splines (MARS) model on iris data.  In this case, we'll predict `Sepal.Length` rather than the more typical classification of `Species` to show how factors might be included in a model and limit the case to regression.\n",
+    "\n",
+    "First, we'll get our region and account information so that we can point to the ECR container we just created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region = boto3.Session().region_name\n",
+    "account = boto3.client('sts').get_caller_identity().get('Account')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "\n",
     "- Specify the role to use\n",
     "- Give the training job a name\n",
@@ -317,9 +400,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "r_job = 'r-byo-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
@@ -330,12 +411,12 @@
     "    \"RoleArn\": role,\n",
     "    \"TrainingJobName\": r_job,\n",
     "    \"AlgorithmSpecification\": {\n",
-    "        \"TrainingImage\": \"345362745630.dkr.ecr.us-west-2.amazonaws.com/rmars:latest\",\n",
+    "        \"TrainingImage\": '{}.dkr.ecr.{}.amazonaws.com/rmars:latest'.format(account, region),\n",
     "        \"TrainingInputMode\": \"File\"\n",
     "    },\n",
     "    \"ResourceConfig\": {\n",
     "        \"InstanceCount\": 1,\n",
-    "        \"InstanceType\": \"ml.c4.xlarge\",\n",
+    "        \"InstanceType\": \"ml.m4.xlarge\",\n",
     "        \"VolumeSizeInGB\": 10\n",
     "    },\n",
     "    \"InputDataConfig\": [\n",
@@ -369,27 +450,25 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now let's kick off our training job on EASE, using the parameters we just created.  Because training is managed (AWS takes care of spinning up and spinning down the hardware), we don't have to wait for our job to finish to continue, but for this case, let's setup a waiter so we can monitor the status of our training."
+    "Now let's kick off our training job on Amazon SageMaker Training, using the parameters we just created.  Because training is managed (AWS takes care of spinning up and spinning down the hardware), we don't have to wait for our job to finish to continue, but for this case, let's setup a waiter so we can monitor the status of our training."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%%time\n",
     "\n",
-    "region = boto3.Session().region_name\n",
-    "sm = boto3.client(service_name='sagemaker',\n",
-    "                  endpoint_url='https://im.{}.amazonaws.com'.format(region))\n",
+    "sm = boto3.client('sagemaker')\n",
     "sm.create_training_job(**r_training_params)\n",
     "\n",
     "status = sm.describe_training_job(TrainingJobName=r_job)['TrainingJobStatus']\n",
     "print(status)\n",
     "sm.get_waiter('TrainingJob_Created').wait(TrainingJobName=r_job)\n",
+    "status = sm.describe_training_job(TrainingJobName=r_job)['TrainingJobStatus']\n",
+    "print(\"Training job ended with status: \" + status)\n",
     "if status == 'Failed':\n",
     "    message = sm.describe_training_job(TrainingJobName=r_job)['FailureReason']\n",
     "    print('Training failed with the following error: {}'.format(message))\n",
@@ -409,13 +488,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "r_hosting_container = {\n",
-    "    'Image': \"345362745630.dkr.ecr.us-west-2.amazonaws.com/rmars:latest\",\n",
+    "    'Image': '{}.dkr.ecr.{}.amazonaws.com/rmars:latest'.format(account, region),\n",
     "    'ModelDataUrl': sm.describe_training_job(TrainingJobName=r_job)['ModelArtifacts']['S3ModelArtifacts']\n",
     "}\n",
     "\n",
@@ -437,9 +514,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "r_endpoint_config = 'r-endpoint-config-' + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n",
@@ -447,7 +522,7 @@
     "create_endpoint_config_response = sm.create_endpoint_config(\n",
     "    EndpointConfigName=r_endpoint_config,\n",
     "    ProductionVariants=[{\n",
-    "        'InstanceType': 'ml.c4.xlarge',\n",
+    "        'InstanceType': 'ml.m4.xlarge',\n",
     "        'InitialInstanceCount': 1,\n",
     "        'ModelName': r_job,\n",
     "        'VariantName': 'AllTraffic'}])\n",
@@ -465,9 +540,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%%time\n",
@@ -483,13 +556,16 @@
     "status = resp['EndpointStatus']\n",
     "print(\"Status: \" + status)\n",
     "\n",
-    "sm.get_waiter('Endpoint_Created').wait(EndpointName=r_endpoint)\n",
+    "try:\n",
+    "    sm.get_waiter('Endpoint_Created').wait(EndpointName=r_endpoint)\n",
+    "finally:\n",
+    "    resp = sm.describe_endpoint(EndpointName=r_endpoint)\n",
+    "    status = resp['EndpointStatus']\n",
+    "    print(\"Arn: \" + resp['EndpointArn'])\n",
+    "    print(\"Status: \" + status)\n",
     "\n",
-    "print(\"Arn: \" + resp['EndpointArn'])\n",
-    "print(\"Status: \" + status)\n",
-    "\n",
-    "if status != 'InService':\n",
-    "    raise Exception('Endpoint creation did not succeed')"
+    "    if status != 'InService':\n",
+    "        raise Exception('Endpoint creation did not succeed')"
    ]
   },
   {
@@ -498,35 +574,45 @@
    "source": [
     "---\n",
     "## Predict\n",
-    "To confirm our endpoints are working properly, let's try to invoke the endpoint."
+    "To confirm our endpoints are working properly, let's try to invoke the endpoint.\n",
+    "\n",
+    "_Note: The payload we're passing in the request is a CSV string with a header record, followed by multiple new lines.  It also contains text columns, which the serving code converts to the set of indicator variables needed for our model predictions.  Again, this is not a best practice for highly optimized code, however, it showcases the flexibility of bringing your own algorithm._"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "runtime = boto3.Session().client(service_name='sagemaker-runtime',\n",
-    "                                 endpoint_url=\"https://maeveruntime.prod.{}.ml-platform.aws.a2z.com\".format(region))\n",
+    "iris = pd.read_csv('iris.csv')\n",
+    "\n",
+    "runtime = boto3.Session().client('sagemaker-runtime')\n",
     "\n",
-    "# TODO Get this payload from an actual file\n",
-    "payload = 'Sepal.Width,Petal.Length,Petal.Width,Species\\n3.5,1.4,0.2,setosa\\n3,1.4,0.2,setosa\\n3.2,1.3,0.2,setosa\\n3.1,1.5,0.2,setosa\\n6,1.4,0.2,setosa\\n3.9,1.7,0.4,setosa'\n",
+    "payload = iris.drop(['Sepal.Length'], axis=1).to_csv(index=False)\n",
     "response = runtime.invoke_endpoint(EndpointName=r_endpoint,\n",
     "                                   ContentType='text/csv',\n",
     "                                   Body=payload)\n",
     "\n",
     "result = json.loads(response['Body'].read().decode())\n",
-    "result"
+    "result "
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Note: The payload we're passing in the request is a CSV string with a header record, followed by multiple new lines.  It also contains text columns, which the serving code converts to the set of indicator variables needed for our model predictions.  Again, this is not a best practice for highly optimized code, however, it showcases the flexibility of bringing your own algorithm._"
+    "We can see the result is a CSV of predictions for our target variable.  Let's compare them to the actuals to see how our model did."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.scatter(iris['Sepal.Length'], np.fromstring(result[0], sep=','))\n",
+    "plt.show()"
    ]
   },
   {
@@ -540,14 +626,31 @@
     "\n",
     "Other extensions could include setting up the R algorithm to train in parallel.  Although R is not the easiest language to build distributed applications on top of, this is possible.  In addition, running multiple versions of training simultaneously would allow for parallelized grid (or random) search for optimal hyperparamter settings.  This would more fully realize the benefits of managed training."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### (Optional) Clean-up\n",
+    "\n",
+    "If you're ready to be done with this notebook, please uncomment and run cell below.  This will remove the hosted endpoint you created and avoid any charges from a stray instance being left on."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#sm.delete_endpoint(EndpointName=r_endpoint)"
+   ]
   }
  ],
  "metadata": {
-  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.",
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Environment (conda_python3)",
    "language": "python",
-   "name": "python3"
+   "name": "conda_python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -559,8 +662,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.2"
-  }
+   "version": "3.6.3"
+  },
+  "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.  Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
  },
  "nbformat": 4,
  "nbformat_minor": 2