From d66cdba1d333d1118d450ff47c25df63bb7e5018 Mon Sep 17 00:00:00 2001 From: smatsumoto78 Date: Tue, 3 Jul 2018 15:25:08 +0900 Subject: [PATCH 1/9] Fix undefined variable reference `args.tensorflow_version_tag` is typo, and it causes runtime error. `tensorflow_version_tag` is intended variable name, isn't it? --- hyperparameter_tuning/keras_bring_your_own/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hyperparameter_tuning/keras_bring_your_own/main.py b/hyperparameter_tuning/keras_bring_your_own/main.py index 8234c0e2bd..16e8a0beb7 100644 --- a/hyperparameter_tuning/keras_bring_your_own/main.py +++ b/hyperparameter_tuning/keras_bring_your_own/main.py @@ -80,7 +80,7 @@ def upload_training_data(): tensorflow_version_tag = get_tensorflow_version_tag(args.tf_version, args.instance_type) - image_name = get_image_name(args.ecr_repository, args.tensorflow_version_tag) + image_name = get_image_name(args.ecr_repository, tensorflow_version_tag) build_image(image_name, tensorflow_version_tag) From 9102d5e5f41fa0fc80b6ed523ac24bb42ee78c13 Mon Sep 17 00:00:00 2001 From: JonathanTaws Date: Tue, 17 Jul 2018 18:21:59 +0200 Subject: [PATCH 2/9] Removed attachedModel in cleanup to avoid error --- sagemaker-spark/pyspark_mnist/pyspark_mnist_kmeans.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-spark/pyspark_mnist/pyspark_mnist_kmeans.ipynb b/sagemaker-spark/pyspark_mnist/pyspark_mnist_kmeans.ipynb index 19129981ac..436f6edfca 100644 --- a/sagemaker-spark/pyspark_mnist/pyspark_mnist_kmeans.ipynb +++ b/sagemaker-spark/pyspark_mnist/pyspark_mnist_kmeans.ipynb @@ -505,7 +505,7 @@ " resource_cleanup.deleteResources(model.getCreatedResources())\n", "\n", "# Don't forget to include any models or pipeline models that you created in the notebook\n", - "models = [initialModel, attachedModel, retrievedModel, modelFromJob]\n", + "models = [initialModel, retrievedModel, modelFromJob]\n", "\n", "# Delete regular SageMakerModels\n", "for m in models:\n", From 0fbb6d19a21f2bbaac4e34829e1a4369ccada1ce Mon Sep 17 00:00:00 2001 From: Farhan Tejani Date: Mon, 30 Jul 2018 22:05:00 +0000 Subject: [PATCH 3/9] Updated example to support latest TF framework --- .../source_dir/resnet_cifar_10.py | 15 +++++++-------- ...nsorflow_resnet_cifar10_with_tensorboard.ipynb | 7 ++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/source_dir/resnet_cifar_10.py b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/source_dir/resnet_cifar_10.py index 8912b5db14..9a7bae38cb 100644 --- a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/source_dir/resnet_cifar_10.py +++ b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/source_dir/resnet_cifar_10.py @@ -133,13 +133,13 @@ def _input_from_files(mode, batch_size, data_dir): if mode == tf.estimator.ModeKeys.TRAIN: dataset = dataset.repeat() - dataset = dataset.map(_dataset_parser, num_threads=1, - output_buffer_size=2 * batch_size) + dataset = dataset.map(_dataset_parser) + dataset.prefetch(2 * batch_size) # For training, preprocess the image and shuffle. if mode == tf.estimator.ModeKeys.TRAIN: - dataset = dataset.map(_train_preprocess_fn, num_threads=1, - output_buffer_size=2 * batch_size) + dataset = dataset.map(_train_preprocess_fn) + dataset.prefetch(2 * batch_size) # Ensure that the capacity is sufficiently large to provide good random # shuffling. @@ -148,9 +148,8 @@ def _input_from_files(mode, batch_size, data_dir): # Subtract off the mean and divide by the variance of the pixels. dataset = dataset.map( - lambda image, label: (tf.image.per_image_standardization(image), label), - num_threads=1, - output_buffer_size=2 * batch_size) + lambda image, label: (tf.image.per_image_standardization(image), label)) + dataset.prefetch(2 * batch_size) # Batch results by up to batch_size, and then fetch the tuple from the # iterator. @@ -203,7 +202,7 @@ def _dataset_parser(value): def _record_dataset(filenames): """Returns an input pipeline Dataset from `filenames`.""" record_bytes = HEIGHT * WIDTH * DEPTH + 1 - return tf.contrib.data.FixedLengthRecordDataset(filenames, record_bytes) + return tf.data.FixedLengthRecordDataset(filenames, record_bytes) def _filenames(mode, data_dir): diff --git a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb index 680d5bd30f..9ccc00b86a 100644 --- a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb +++ b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb @@ -118,7 +118,6 @@ "estimator = TensorFlow(entry_point='resnet_cifar_10.py',\n", " source_dir=source_dir,\n", " role=role,\n", - " framework_version='1.6',\n", " hyperparameters={'throttle_secs': 30},\n", " training_steps=1000, evaluation_steps=100,\n", " train_instance_count=2, train_instance_type='ml.c4.xlarge', \n", @@ -146,9 +145,7 @@ }, { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, "source": [ "# Deploy the trained model to prepare for predictions\n", "\n", @@ -225,7 +222,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "2.7.14" }, "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." }, From 5d9c69a55646614080b97265ebe9a42ef62c14a6 Mon Sep 17 00:00:00 2001 From: Andrew Packer Date: Thu, 30 Aug 2018 11:32:21 -0700 Subject: [PATCH 4/9] Update KMS encryption notebook with steps for batch transformation --- .../handling_kms_encrypted_data.ipynb | 202 ++++++++++-------- 1 file changed, 114 insertions(+), 88 deletions(-) diff --git a/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb b/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb index 7ba3d7470a..338f5666ef 100644 --- a/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb +++ b/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb @@ -50,7 +50,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "isConfigCell": true }, "outputs": [], @@ -74,7 +73,7 @@ "bucket='' # put your s3 bucket name here, and create s3 bucket\n", "prefix = 'sagemaker/DEMO-kms'\n", "# customize to your bucket where you have stored the data\n", - "bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)" + "bucket_path = 's3://{}'.format(bucket)" ] }, { @@ -93,9 +92,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", @@ -116,15 +113,13 @@ "source": [ "### Data preprocessing\n", "\n", - "Now that we have the dataset, we need to split it into *train*, *validation*, and *test* datasets which we can use to evaluate the accuracy of the machine learning algorithm. We randomly split the dataset into 60% training, 20% validation and 20% test. Note that SageMaker Xgboost, expects the label column to be the first one in the datasets. So, we'll move the median value column (`MEDV`) from the last to the first position within the `write_file` method below. " + "Now that we have the dataset, we need to split it into *train*, *validation*, and *test* datasets which we can use to evaluate the accuracy of the machine learning algorithm. We'll also create a test dataset file with the labels removed so it can be fed into a batch transform job. We randomly split the dataset into 60% training, 20% validation and 20% test. Note that SageMaker Xgboost, expects the label column to be the first one in the datasets. So, we'll move the median value column (`MEDV`) from the last to the first position within the `write_file` method below. " ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", @@ -135,37 +130,36 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "def write_file(X, y, fname):\n", + "def write_file(X, y, fname, include_labels=True):\n", " feature_names = boston['feature_names']\n", " data = pd.DataFrame(X, columns=feature_names)\n", - " target = pd.DataFrame(y, columns={'MEDV'})\n", - " data['MEDV'] = y\n", - " # bring this column to the front before writing the files\n", - " cols = data.columns.tolist()\n", - " cols = cols[-1:] + cols[:-1]\n", - " data = data[cols]\n", + " if include_labels:\n", + " target = pd.DataFrame(y, columns={'MEDV'})\n", + " data['MEDV'] = y\n", + " # bring this column to the front before writing the files\n", + " cols = data.columns.tolist()\n", + " cols = cols[-1:] + cols[:-1]\n", + " data = data[cols]\n", " data.to_csv(fname, header=False, index=False)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "train_file = 'train.csv'\n", "validation_file = 'val.csv'\n", "test_file = 'test.csv'\n", + "test_no_labels_file = 'test_no_labels.csv'\n", "write_file(X_train, y_train, train_file)\n", "write_file(X_val, y_val, validation_file)\n", - "write_file(X_test, y_test, test_file)" + "write_file(X_test, y_test, test_file)\n", + "write_file(X_test, y_test, test_no_labels_file, False)" ] }, { @@ -178,9 +172,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "s3 = boto3.client('s3')\n", @@ -207,7 +199,19 @@ " ServerSideEncryption='aws:kms',\n", " SSEKMSKeyId=kms_key_id)\n", "\n", - "print(\"Done uploading the validation dataset\")" + "print(\"Done uploading the validation dataset\")\n", + "\n", + "data_test = open(test_no_labels_file, 'rb')\n", + "key_test = '{}/test/{}'.format(prefix,test_no_labels_file)\n", + "\n", + "print(\"Put object...\")\n", + "s3.put_object(Bucket=bucket,\n", + " Key=key_test,\n", + " Body=data_test,\n", + " ServerSideEncryption='aws:kms',\n", + " SSEKMSKeyId=kms_key_id)\n", + "\n", + "print(\"Done uploading the test dataset\")" ] }, { @@ -222,9 +226,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from sagemaker.amazon.amazon_estimator import get_image_uri\n", @@ -234,9 +236,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%%time\n", @@ -334,9 +334,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%%time\n", @@ -375,9 +373,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from time import gmtime, strftime\n", @@ -401,15 +397,13 @@ "metadata": {}, "source": [ "### Create endpoint\n", - "Lastly, create the endpoint that serves up the model, through specifying the name and configuration defined above. The end result is an endpoint that can be validated and incorporated into production applications. This takes 9-11 minutes to complete." + "Create the endpoint that serves up the model, through specifying the name and configuration defined above. The end result is an endpoint that can be validated and incorporated into production applications. This takes 9-11 minutes to complete." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%%time\n", @@ -449,15 +443,13 @@ "metadata": {}, "source": [ "## Validate the model for use\n", - "Finally, you can now validate the model for use. They can obtain the endpoint from the client library using the result from previous operations, and generate classifications from the trained model using that endpoint.\n" + "You can now validate the model for use. Obtain the endpoint from the client library using the result from previous operations, and run a single prediction on the trained model using that endpoint.\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "runtime_client = boto3.client('runtime.sagemaker')" @@ -466,87 +458,121 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import sys\n", "import math\n", "def do_predict(data, endpoint_name, content_type):\n", - " payload = ''.join(data)\n", " response = runtime_client.invoke_endpoint(EndpointName=endpoint_name, \n", " ContentType=content_type, \n", - " Body=payload)\n", + " Body=data)\n", " result = response['Body'].read()\n", " result = result.decode(\"utf-8\")\n", - " result = result.split(',')\n", " return result\n", "\n", - "def batch_predict(data, batch_size, endpoint_name, content_type):\n", - " items = len(data)\n", - " arrs = []\n", - " \n", - " for offset in range(0, items, batch_size):\n", - " if offset+batch_size < items:\n", - " results = do_predict(data[offset:(offset+batch_size)], endpoint_name, content_type)\n", - " arrs.extend(results)\n", - " else:\n", - " arrs.extend(do_predict(data[offset:items], endpoint_name, content_type))\n", - " sys.stdout.write('.')\n", - " return(arrs)" + "# pull the first item from the test dataset\n", + "with open('test.csv') as f:\n", + " first_line = f.readline()\n", + " features = first_line.split(',')[1:]\n", + " feature_str = ','.join(features)\n", + "\n", + "prediction = do_predict(feature_str, endpoint_name, 'text/csv')\n", + "print('Prediction: ' + prediction)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The following helps us calculate the Median Absolute Percent Error (MdAPE) on the batch dataset. Note that the intent of this example is not to produce the most accurate regressor but to demonstrate how to handle KMS encrypted data with SageMaker. " + "### (Optional) Delete the Endpoint\n", + "\n", + "If you're ready to be done with this notebook, please run the delete_endpoint line in the cell below. This will remove the hosted endpoint you created and avoid any charges from a stray instance being left on." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "import json\n", - "import numpy as np\n", - "\n", - "\n", - "with open('test.csv') as f:\n", - " lines = f.readlines()\n", + "client.delete_endpoint(EndpointName=endpoint_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run batch prediction using batch transform\n", + "Create a transform job to do batch prediction using the trained model. Similar to the training section above, the execution role assumed by this notebook must have permissions to encrypt and decrypt data with the KMS key (`kms_key_id`) used for S3 server-side encryption." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transform_job_name = 'DEMO-xgboost-batch-prediction' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", + "print(\"Transform job\", transform_job_name)\n", "\n", - "#remove the labels\n", - "labels = [line.split(',')[0] for line in lines]\n", - "features = [line.split(',')[1:] for line in lines]\n", + "transform_params = \\\n", + "{\n", + " \"TransformJobName\": transform_job_name,\n", + " \"ModelName\": model_name,\n", + " \"TransformInput\": {\n", + " \"ContentType\": \"text/csv\",\n", + " \"DataSource\": {\n", + " \"S3DataSource\": {\n", + " \"S3DataType\": \"S3Prefix\",\n", + " \"S3Uri\": bucket_path + \"/\"+ prefix + '/test'\n", + " }\n", + " },\n", + " \"SplitType\": \"Line\"\n", + " },\n", + " \"TransformOutput\": {\n", + " \"AssembleWith\": \"Line\",\n", + " \"S3OutputPath\": bucket_path + \"/\"+ prefix + '/predict'\n", + " },\n", + " \"TransformResources\": {\n", + " \"InstanceCount\": 1,\n", + " \"InstanceType\": \"ml.c4.xlarge\"\n", + " }\n", + "}\n", "\n", - "features_str = [','.join(row) for row in features]\n", - "preds = batch_predict(features_str, 100, endpoint_name, 'text/csv')\n", - "print('\\n Median Absolute Percent Error (MdAPE) = ', np.median(np.abs(np.asarray(labels, dtype=float) - np.asarray(preds, dtype=float)) / np.asarray(labels, dtype=float)))" + "client.create_transform_job(**transform_params)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### (Optional) Delete the Endpoint\n", + "### Evaluate the batch predictions\n", "\n", - "If you're ready to be done with this notebook, please run the delete_endpoint line in the cell below. This will remove the hosted endpoint you created and avoid any charges from a stray instance being left on." + "The following helps us calculate the Median Absolute Percent Error (MdAPE) on the batch prediction output in S3. Note that the intent of this example is not to produce the most accurate regressor but to demonstrate how to handle KMS encrypted data with SageMaker." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "client.delete_endpoint(EndpointName=endpoint_name)" + "print(\"Downloading prediction object...\")\n", + "s3.download_file(Bucket=bucket,\n", + " Key=prefix + '/predict/' + test_no_labels_file + '.out',\n", + " Filename='./predictions.csv')\n", + "\n", + "# load predictions\n", + "with open('predictions.csv') as f:\n", + " preds = f.readlines()\n", + "\n", + "# extract labels from test file\n", + "with open('test.csv') as f:\n", + " test_lines = f.readlines()\n", + "labels = [line.split(',')[0] for line in test_lines]\n", + "\n", + "print('\\nMedian Absolute Percent Error (MdAPE) = ', np.median(np.abs(np.asarray(labels, dtype=float) - np.asarray(preds, dtype=float)) / np.asarray(labels, dtype=float)))" ] } ], From e63247f37d300209d121ebe19ecdcea7ce8a0fa3 Mon Sep 17 00:00:00 2001 From: Yu Date: Thu, 6 Sep 2018 13:45:24 -0700 Subject: [PATCH 5/9] Update tensorflow related notebooks to use tensorflow 1.10.0 --- .../tensorflow_mnist/hpo_tensorflow_mnist.ipynb | 1 + .../tensorflow_abalone_age_predictor_using_keras.ipynb | 2 +- .../tensorflow_abalone_age_predictor_using_layers.ipynb | 2 +- .../tensorflow_batch_transform_mnist.ipynb | 2 +- .../tensorflow_distributed_mnist.ipynb | 2 +- .../tensorflow_local_mode_mnist.ipynb | 2 +- .../tensorflow_iris_dnn_classifier_using_estimators.ipynb | 2 +- .../tensorflow_keras_cifar10/tensorflow_keras_CIFAR10.ipynb | 2 +- .../tensorflow_pipemode_example.ipynb | 2 +- .../tensorflow_resnet_cifar10_with_tensorboard.ipynb | 2 +- 10 files changed, 10 insertions(+), 9 deletions(-) diff --git a/hyperparameter_tuning/tensorflow_mnist/hpo_tensorflow_mnist.ipynb b/hyperparameter_tuning/tensorflow_mnist/hpo_tensorflow_mnist.ipynb index 120ba51def..49ed204808 100644 --- a/hyperparameter_tuning/tensorflow_mnist/hpo_tensorflow_mnist.ipynb +++ b/hyperparameter_tuning/tensorflow_mnist/hpo_tensorflow_mnist.ipynb @@ -176,6 +176,7 @@ "source": [ "estimator = TensorFlow(entry_point='mnist.py',\n", " role=role,\n", + " framework_version='1.10.0',\n", " training_steps=1000, \n", " evaluation_steps=100,\n", " train_instance_count=1,\n", diff --git a/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_keras/tensorflow_abalone_age_predictor_using_keras.ipynb b/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_keras/tensorflow_abalone_age_predictor_using_keras.ipynb index 89a5626b83..7fe551c331 100644 --- a/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_keras/tensorflow_abalone_age_predictor_using_keras.ipynb +++ b/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_keras/tensorflow_abalone_age_predictor_using_keras.ipynb @@ -461,7 +461,7 @@ "\n", "abalone_estimator = TensorFlow(entry_point='abalone.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " training_steps= 100, \n", " evaluation_steps= 100,\n", " hyperparameters={'learning_rate': 0.001},\n", diff --git a/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_layers/tensorflow_abalone_age_predictor_using_layers.ipynb b/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_layers/tensorflow_abalone_age_predictor_using_layers.ipynb index c0fb53fc35..4b0c5ac589 100644 --- a/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_layers/tensorflow_abalone_age_predictor_using_layers.ipynb +++ b/sagemaker-python-sdk/tensorflow_abalone_age_predictor_using_layers/tensorflow_abalone_age_predictor_using_layers.ipynb @@ -506,7 +506,7 @@ "\n", "abalone_estimator = TensorFlow(entry_point='abalone.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " training_steps= 100, \n", " evaluation_steps= 100,\n", " hyperparameters={'learning_rate': 0.001},\n", diff --git a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb index 3f58bf8992..4418abfeb1 100644 --- a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb +++ b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_batch_transform_mnist.ipynb @@ -117,7 +117,7 @@ "\n", "mnist_estimator = TensorFlow(entry_point='mnist.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " training_steps=1000, \n", " evaluation_steps=100,\n", " train_instance_count=2,\n", diff --git a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_distributed_mnist.ipynb b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_distributed_mnist.ipynb index e90973fc7e..90285e4f7a 100644 --- a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_distributed_mnist.ipynb +++ b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_distributed_mnist.ipynb @@ -149,7 +149,7 @@ "\n", "mnist_estimator = TensorFlow(entry_point='mnist.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " training_steps=1000, \n", " evaluation_steps=100,\n", " train_instance_count=2,\n", diff --git a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_local_mode_mnist.ipynb b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_local_mode_mnist.ipynb index dbf1bc9002..bf3be2b44d 100644 --- a/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_local_mode_mnist.ipynb +++ b/sagemaker-python-sdk/tensorflow_distributed_mnist/tensorflow_local_mode_mnist.ipynb @@ -165,7 +165,7 @@ "\n", "mnist_estimator = TensorFlow(entry_point='mnist.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " training_steps=10, \n", " evaluation_steps=10,\n", " train_instance_count=2,\n", diff --git a/sagemaker-python-sdk/tensorflow_iris_dnn_classifier_using_estimators/tensorflow_iris_dnn_classifier_using_estimators.ipynb b/sagemaker-python-sdk/tensorflow_iris_dnn_classifier_using_estimators/tensorflow_iris_dnn_classifier_using_estimators.ipynb index 9f8eed4673..81688fbc4b 100644 --- a/sagemaker-python-sdk/tensorflow_iris_dnn_classifier_using_estimators/tensorflow_iris_dnn_classifier_using_estimators.ipynb +++ b/sagemaker-python-sdk/tensorflow_iris_dnn_classifier_using_estimators/tensorflow_iris_dnn_classifier_using_estimators.ipynb @@ -300,7 +300,7 @@ "\n", "iris_estimator = TensorFlow(entry_point='iris_dnn_classifier.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " output_path=model_artifacts_location,\n", " code_location=custom_code_upload_location,\n", " train_instance_count=1,\n", diff --git a/sagemaker-python-sdk/tensorflow_keras_cifar10/tensorflow_keras_CIFAR10.ipynb b/sagemaker-python-sdk/tensorflow_keras_cifar10/tensorflow_keras_CIFAR10.ipynb index 9eaddf8245..2cd6346401 100644 --- a/sagemaker-python-sdk/tensorflow_keras_cifar10/tensorflow_keras_CIFAR10.ipynb +++ b/sagemaker-python-sdk/tensorflow_keras_cifar10/tensorflow_keras_CIFAR10.ipynb @@ -216,7 +216,7 @@ "\n", "estimator = TensorFlow(entry_point='cifar10_cnn.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " hyperparameters={'learning_rate': 1e-4, 'decay':1e-6},\n", " training_steps=1000, evaluation_steps=100,\n", " train_instance_count=1, train_instance_type='ml.c4.xlarge')\n", diff --git a/sagemaker-python-sdk/tensorflow_pipemode_example/tensorflow_pipemode_example.ipynb b/sagemaker-python-sdk/tensorflow_pipemode_example/tensorflow_pipemode_example.ipynb index eb7e30659a..dadfb6b6df 100644 --- a/sagemaker-python-sdk/tensorflow_pipemode_example/tensorflow_pipemode_example.ipynb +++ b/sagemaker-python-sdk/tensorflow_pipemode_example/tensorflow_pipemode_example.ipynb @@ -132,7 +132,7 @@ "\n", "tensorflow = TensorFlow(entry_point='pipemode.py',\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " input_mode='Pipe',\n", " output_path=model_artifacts_location,\n", " code_location=custom_code_upload_location,\n", diff --git a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb index 35b8269eeb..9d242f7634 100644 --- a/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb +++ b/sagemaker-python-sdk/tensorflow_resnet_cifar10_with_tensorboard/tensorflow_resnet_cifar10_with_tensorboard.ipynb @@ -118,7 +118,7 @@ "estimator = TensorFlow(entry_point='resnet_cifar_10.py',\n", " source_dir=source_dir,\n", " role=role,\n", - " framework_version='1.9',\n", + " framework_version='1.10.0',\n", " hyperparameters={'throttle_secs': 30},\n", " training_steps=1000, evaluation_steps=100,\n", " train_instance_count=2, train_instance_type='ml.c4.xlarge', \n", From 34821d894bb1a5b3fdf4b9668f44d409746ecfd8 Mon Sep 17 00:00:00 2001 From: Andrew Packer Date: Mon, 10 Sep 2018 12:38:27 -0700 Subject: [PATCH 6/9] Update pandas/numpy usage in kms example Add polling after creation of transform job --- .../handling_kms_encrypted_data.ipynb | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb b/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb index 338f5666ef..0c7412898e 100644 --- a/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb +++ b/advanced_functionality/handling_kms_encrypted_data/handling_kms_encrypted_data.ipynb @@ -137,12 +137,7 @@ " feature_names = boston['feature_names']\n", " data = pd.DataFrame(X, columns=feature_names)\n", " if include_labels:\n", - " target = pd.DataFrame(y, columns={'MEDV'})\n", - " data['MEDV'] = y\n", - " # bring this column to the front before writing the files\n", - " cols = data.columns.tolist()\n", - " cols = cols[-1:] + cols[:-1]\n", - " data = data[cols]\n", + " data.insert(0, 'MEDV', y)\n", " data.to_csv(fname, header=False, index=False)" ] }, @@ -513,6 +508,7 @@ "metadata": {}, "outputs": [], "source": [ + "%%time\n", "transform_job_name = 'DEMO-xgboost-batch-prediction' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", "print(\"Transform job\", transform_job_name)\n", "\n", @@ -540,7 +536,18 @@ " }\n", "}\n", "\n", - "client.create_transform_job(**transform_params)" + "client.create_transform_job(**transform_params)\n", + "\n", + "while True:\n", + " response = client.describe_transform_job(TransformJobName=transform_job_name)\n", + " status = response['TransformJobStatus']\n", + " if status == 'InProgress':\n", + " time.sleep(15)\n", + " elif status == 'Completed':\n", + " print(\"Transform job completed!\")\n", + " break\n", + " else:\n", + " print(\"Unexpected transform job status: \" + status)" ] }, { @@ -563,16 +570,8 @@ " Key=prefix + '/predict/' + test_no_labels_file + '.out',\n", " Filename='./predictions.csv')\n", "\n", - "# load predictions\n", - "with open('predictions.csv') as f:\n", - " preds = f.readlines()\n", - "\n", - "# extract labels from test file\n", - "with open('test.csv') as f:\n", - " test_lines = f.readlines()\n", - "labels = [line.split(',')[0] for line in test_lines]\n", - "\n", - "print('\\nMedian Absolute Percent Error (MdAPE) = ', np.median(np.abs(np.asarray(labels, dtype=float) - np.asarray(preds, dtype=float)) / np.asarray(labels, dtype=float)))" + "preds = np.loadtxt('predictions.csv')\n", + "print('\\nMedian Absolute Percent Error (MdAPE) = ', np.median(np.abs(y_test - preds) / y_test))" ] } ], From 949c38bf780ef2a8c4729aa178c99e7bee9c736b Mon Sep 17 00:00:00 2001 From: Gurumurthy Swaminathan Date: Thu, 13 Sep 2018 11:06:56 -0700 Subject: [PATCH 7/9] multi-label notebook and high-level api notebooks Added multi-label notebook and high-level notebooks for existing ones --- .../Image-classification-fulltraining.ipynb | 365 +++++++++++++ .../Image-classification-lst-format.ipynb | 429 +++++++++++++++ .../Image-classification-multilabel-lst.ipynb | 504 ++++++++++++++++++ ...age-classification-transfer-learning.ipynb | 358 +++++++++++++ .../imageclassification_highlevel/README.md | 11 + 5 files changed, 1667 insertions(+) create mode 100644 introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-fulltraining.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-lst-format.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-multilabel-lst.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-transfer-learning.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_highlevel/README.md diff --git a/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-fulltraining.ipynb b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-fulltraining.ipynb new file mode 100644 index 0000000000..e5717866b8 --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-fulltraining.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# End-to-End Multiclass Image Classification Example\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + " 1. [Permissions and environment variables](#Permissions-and-environment-variables)\n", + " 2. [Prepare the data](#Prepare-the-data)\n", + "3. [Training the model](#Training-the-model)\n", + " 1. [Training parameters](#Training-parameters)\n", + " 2. [Start the training](#Start-the-training)\n", + "4. [Inference](#Inference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of distributed image classification algorithm. In this demo, we will use the Amazon sagemaker image classification algorithm to train on the [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket=sess.default_bucket()\n", + "prefix = 'ic-fulltraining'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data preparation\n", + "Download the data and transfer to S3 for use in training. In this demo, we are using [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset, which contains 30608 images of 256 objects. For the training and validation data, we follow the splitting scheme in this MXNet [example](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/data/caltech256.sh). In particular, it randomly selects 60 images per class for training, and uses the remaining data for validation. The algorithm takes `RecordIO` file as input. The user can also provide the image files as input, which will be converted into `RecordIO` format using MXNet's [im2rec](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec) tool. It takes around 50 seconds to converted the entire Caltech-256 dataset (~1.2GB) on a p2.xlarge instance. However, for this demo, we will use record io format. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os \n", + "import urllib.request\n", + "import boto3\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + " \n", + "def upload_to_s3(channel, file):\n", + " s3 = boto3.resource('s3')\n", + " data = open(file, \"rb\")\n", + " key = channel + '/' + file\n", + " s3.Bucket(bucket).put_object(Key=key, Body=data)\n", + "\n", + "\n", + "# caltech-256\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "\n", + "# upload the lst files to train and validation channels\n", + "!aws s3 cp caltech-256-60-train.rec $s3train --quiet\n", + "!aws s3 cp caltech-256-60-val.rec $s3validation --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "Once we have the data available in the correct format for training, the next step is to actually train the model using the data. After setting training parameters, we kick off training, and poll for status until training is completed.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the model\n", + "\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **top_k**: Report the top-k accuracy during training.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " num_training_samples=15420,\n", + " mini_batch_size=128,\n", + " epochs=5,\n", + " learning_rate=0.01,\n", + " top_k=2,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the topic mixture representing a given document. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inteference. The network outputs class probabilities and typically, one selects the class with the maximum probability as the final class output.\n", + "\n", + "**Note:** The output class detected by the network may not be accurate in this example. To limit the time taken and cost of training, we have trained the model only for 5 epochs. If the network is trained for more epochs (say 20), then the output class will be more accurate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean up\n", + "\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Uncomment and run the following cell to delete the endpoint and model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-lst-format.ipynb b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-lst-format.ipynb new file mode 100644 index 0000000000..6a790c4528 --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-lst-format.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification training with image format demo\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + " 1. [Permissions and environment variables](#Permissions-and-environment-variables)\n", + " 2. [Prepare the data](#Prepare-the-data)\n", + "3. [Fine-tuning The Image Classification Model](#Fine-tuning-the-Image-classification-model)\n", + " 1. [Training parameters](#Training-parameters)\n", + " 2. [Start the training](#Start-the-training)\n", + "4. [Inference](#Inference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of the image classification algorithm training with image format. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new dataset. In particular, the pre-trained model will be fine-tuned using [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket=sess.default_bucket() \n", + "prefix = 'ic-lstformat'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Prepare the data\n", + "The caltech 256 dataset consist of images from 257 categories (the last one being a clutter category) and has 30k images with a minimum of 80 images and a maximum of about 800 images per category. \n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [RecordIO format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) (content type: application/x-recordio) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec) (content type: application/x-image). Files for both these formats are available at http://data.dmlc.ml/mxnet/data/caltech-256/. In this example, we will use the lst format for training and use the training/validation split [specified here](http://data.dmlc.ml/mxnet/data/caltech-256/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + "\n", + "# Caltech-256 image files\n", + "download('http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar')\n", + "!tar -xf 256_ObjectCategories.tar\n", + "\n", + "# Tool for creating lst file\n", + "download('https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "mkdir -p caltech_256_train_60\n", + "for i in 256_ObjectCategories/*; do\n", + " c=`basename $i`\n", + " mkdir -p caltech_256_train_60/$c\n", + " for j in `ls $i/*.jpg | shuf | head -n 60`; do\n", + " mv $j caltech_256_train_60/$c/\n", + " done\n", + "done\n", + "\n", + "python im2rec.py --list --recursive caltech-256-60-train caltech_256_train_60/\n", + "python im2rec.py --list --recursive caltech-256-60-val 256_ObjectCategories/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A .lst file is a tab-separated file with three columns that contains a list of image files. The first column specifies the image index, the second column specifies the class label index for the image, and the third column specifies the relative path of the image file. The image index in the first column should be unique across all of the images. Here we make an image list file using the [im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) tool from MXNet. You can also create the .lst file in your own way. An example of .lst file is shown as follows. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!head -n 3 ./caltech-256-60-train.lst > example.lst\n", + "f = open('example.lst','r')\n", + "lst_content = f.read()\n", + "print(lst_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When you are bringing your own image files to train, please ensure that the .lst file follows the same format as described above. In order to train with the lst format interface, passing the lst file for both training and validation in the appropriate format is mandatory. Once we have the data available in the correct format for training, the next step is to upload the image and .lst file to S3 bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "s3train_lst = 's3://{}/{}/train_lst/'.format(bucket, prefix)\n", + "s3validation_lst = 's3://{}/{}/validation_lst/'.format(bucket, prefix)\n", + "\n", + "# upload the image files to train and validation channels\n", + "!aws s3 cp caltech_256_train_60 $s3train --recursive --quiet\n", + "!aws s3 cp 256_ObjectCategories $s3validation --recursive --quiet\n", + "\n", + "# upload the lst files to train_lst and validation_lst channels\n", + "!aws s3 cp caltech-256-60-train.lst $s3train_lst --quiet\n", + "!aws s3 cp caltech-256-60-val.lst $s3validation_lst --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have all the data stored in S3 bucket. The image and lst files will be converted to RecordIO file internelly by the image classification algorithm. But if you want do the conversion, the following cell shows how to do it using the [im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) tool. Note that this is just an example of creating RecordIO files. We are **_not_** using them for training in this notebook. More details on creating RecordIO files can be found in this [tutorial](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-256-60-val 256_ObjectCategories/\n", + "python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-256-60-train caltech_256_train_60/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After you created the RecordIO files, you can upload them to the train and validation channels for training. To train with RecordIO format, you can follow \"[Image-classification-fulltraining.ipynb](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining.ipynb)\" and \"[Image-classification-transfer-learning.ipynb](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb)\". Again, we will **_not_** use the RecordIO file for the training. The following sections will only show you how to train a model with images and list files." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training the model, we need to setup the training parameters. The next section will explain the parameters in detail." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fine-tuning the Image Classification Model\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **top_k**: Report the top-k accuracy during training.\n", + "* **resize**: Resize the image before using it for training. The images are resized so that the shortest side is of this parameter. If the parameter is not set, then the training data is used as such without resizing.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "isConfigCell": true + }, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " mini_batch_size=128,\n", + " epochs=2,\n", + " learning_rate=0.01,\n", + " top_k=2,\n", + " num_training_samples=15420,\n", + " resize = 256,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "train_data_lst = sagemaker.session.s3_input(s3train_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data_lst = sagemaker.session.s3_input(s3validation_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data, \n", + " 'train_lst': train_data_lst, 'validation_lst': validation_data_lst}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the topic mixture representing a given document. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Clean up\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Uncomment and run the following cell to delete the endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-multilabel-lst.ipynb b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-multilabel-lst.ipynb new file mode 100644 index 0000000000..1e645043de --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-multilabel-lst.ipynb @@ -0,0 +1,504 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification multi-label classification\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites](#Prequisites)\n", + "3. [Data Preparation](#Data-Preparation)\n", + "3. [Multi-label Training](#Multi-label-Training)\n", + "4. [Inference](#Inference)\n", + "5. [Clean-up](#Clean-up)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of multi-label classification using the Sagemaker 1P image classification algorithm. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new multi-label dataset. In particular, the pre-trained model will be fine-tuned using [MS-COCO](http://cocodataset.org/#overview) dataset. \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket = sess.default_bucket()\n", + "prefix = 'ic-multilabel'\n", + "\n", + "print('using bucket %s'%bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Preparation\n", + "MS COCO is a large-scale dataset for multiple computer vision tasks, including object detection, segmentation, and captioning. In this notebook, we will use the object detection dataset to construct the multi-label dataset for classification. We will use the 2017 validation set from MS-COCO dataset to train multi-label classifier. MS-COCO dataset consist of images from 80 categories. We will choose 5 categories out of 80 and train the model to learn to classify these 5 categories. These are: \n", + "\n", + "1. Person\n", + "2. Bicycle\n", + "3. Car\n", + "4. Motorcycle\n", + "5. Airplane\n", + "\n", + "An image can contain objects of multiple categories. We first create a dataset with these 5 categories. COCO is a very large dataset, and the purpose of this notebook is to show how multi-label classification works. So, instead we’ll take what COCO calls their validation dataset from 2017, and use this as our only data. We then split this dataset into a train and holdout dataset for fine tuning the model and testing our final accuracy\n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [recordio format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec). We will use the lst file format for training. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset License\n", + "\n", + "The annotations in this dataset belong to the COCO Consortium and are licensed under a Creative Commons Attribution 4.0 License. The COCO Consortium does not own the copyright of the images. Use of the images must abide by the Flickr Terms of Use. The users of the images accept full responsibility for the use of the dataset, including but not limited to the use of any copies of copyrighted images that they may create from the dataset. Before you use this data for any other purpose than this example, you should understand the data license, described at http://cocodataset.org/#termsofuse\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + "\n", + "# MSCOCO validation image files\n", + "download('http://images.cocodataset.org/zips/val2017.zip')\n", + "download('http://images.cocodataset.org/annotations/annotations_trainval2017.zip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "unzip -qo val2017.zip\n", + "unzip -qo annotations_trainval2017.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install tools\n", + "\n", + "We need pycocotools to parse the annotations for the MSCOCO dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%bash\n", + "pip -q install pycocotools" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parse the annotation to create lst file\n", + "Use pycocotools to parse the annotation and create the lst file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pycocotools.coco import COCO\n", + "import numpy as np\n", + "import os\n", + "\n", + "annFile='./annotations/instances_val2017.json'\n", + "coco=COCO(annFile)\n", + "\n", + "catIds = coco.getCatIds()\n", + "image_ids_of_cats = []\n", + "for cat in catIds:\n", + " image_ids_of_cats.append(coco.getImgIds(catIds=cat))\n", + "\n", + "image_ids = []\n", + "labels = []\n", + "# use only the first 5 classes\n", + "# obtain image ids and labels for images with these 5 classes\n", + "cats = [1, 2, 3, 4, 5]\n", + "for ind_cat in cats:\n", + " for image_id in image_ids_of_cats[ind_cat-1]:\n", + " if image_id in image_ids:\n", + " labels[image_ids.index(image_id)][ind_cat-1] = 1\n", + " else:\n", + " image_ids.append(image_id)\n", + " labels.append(np.zeros(len(cats), dtype=np.int))\n", + " labels[-1][ind_cat-1] = 1\n", + "# Construct the lst file from the image ids and labels\n", + "# The first column is the image index, the last is the image filename\n", + "# and the second to last but one are the labels\n", + "with open('image.lst', 'w') as fp:\n", + " sum_labels = labels[0]\n", + " for ind, image_id in enumerate(image_ids):\n", + " coco_img = coco.loadImgs(image_id)\n", + " image_path = os.path.join(coco_img[0]['file_name'])\n", + " label_h = labels[ind]\n", + " sum_labels += label_h\n", + " fp.write(str(ind) + '\\t')\n", + " for j in label_h:\n", + " fp.write(str(j) + '\\t')\n", + " fp.write(image_path)\n", + " fp.write('\\n')\n", + " fp.close()\n", + "print(sum_labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create training and validation set\n", + "Create training and validation set by splitting the lst file. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "shuf image.lst > im.lst\n", + "head -n 2500 im.lst > mscocoval2017train.lst\n", + "tail -n +2501 im.lst > mscocoval2017val.lst\n", + "head mscocoval2017train.lst\n", + "wc -l mscocoval2017train.lst\n", + "wc -l mscocoval2017val.lst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload the data\n", + "Upload the data onto the s3 bucket. The images are uploaded onto train and validation bucket. The lst files are uploaded to train_lst and validation_lst folders. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "s3train_lst = 's3://{}/{}/train_lst/'.format(bucket, prefix)\n", + "s3validation_lst = 's3://{}/{}/validation_lst/'.format(bucket, prefix)\n", + "\n", + "# upload the image files to train and validation channels\n", + "!aws s3 cp val2017 $s3train --recursive --quiet\n", + "!aws s3 cp val2017 $s3validation --recursive --quiet\n", + "\n", + "# upload the lst files to train_lst and validation_lst channels\n", + "!aws s3 cp mscocoval2017train.lst $s3train_lst --quiet\n", + "!aws s3 cp mscocoval2017val.lst $s3validation_lst --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multi-label Training\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "multilabel_ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Algorithm parameters\n", + "\n", + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the dataset. We use 5 classes from MSCOCO and hence it is set to 5\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run\n", + "* **resize**: Resize the image before using it for training. The images are resized so that the shortest side is of this parameter. If the parameter is not set, then the training data is used as such without resizing.\n", + "* **epochs**: Number of training epochs\n", + "* **learning_rate**: Learning rate for training\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 2500 for COCO dataset with the current split\n", + "* **use_weighted_loss**: This parameter is used to balance the influence of the positive and negative samples within the dataset.\n", + "* **augmentation_type**: This parameter determines the type of augmentation used for training. It can take on three values, 'crop', 'crop_color' and 'crop_color_transform'\n", + "* **precision_dtype**: The data type precision used during training. Using ``float16`` can lead to faster training with minimal drop in accuracy, paritcularly on P3 machines. By default, the parameter is set to ``float32``\n", + "* **multi_label**: Set multi_label to 1 for multi-label processing\n", + "\n", + "You can find a detailed description of all the algorithm parameters at https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multilabel_ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=5,\n", + " mini_batch_size=128,\n", + " resize=256,\n", + " epochs=5,\n", + " learning_rate=0.001,\n", + " num_training_samples=2500,\n", + " use_weighted_loss=1,\n", + " augmentation_type = 'crop_color_transform',\n", + " precision_dtype='float32',\n", + " multi_label=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input data specification\n", + "Set the data type and channels used for training. In this training, we use application/x-image content type that require individual images and lst file for data input. In addition, Sagemaker image classification algorithm supports application/x-recordio format which can be used for larger datasets. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "train_data_lst = sagemaker.session.s3_input(s3train_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data_lst = sagemaker.session.s3_input(s3validation_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "data_channels = {'train': train_data, 'validation': validation_data, 'train_lst': train_data_lst, \n", + " 'validation_lst': validation_data_lst}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multilabel_ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the class of the image. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = multilabel_ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline \n", + "!wget -q -O /tmp/test.jpg https://images.pexels.com/photos/763398/pexels-photo-763398.jpeg\n", + " \n", + "import cv2\n", + "# resize image size for inference\n", + "file_name = '/tmp/test.jpg'\n", + "im = cv2.imread(file_name)\n", + "im = cv2.resize(im, (600, 400))\n", + "cv2.imwrite(file_name, im)\n", + "\n", + "# display test image\n", + "from IPython.display import Image, display\n", + "img = Image(file_name) \n", + "display(img)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inference. The network outputs class probabilities for all the classes. As can be seen from this example, the network output is pretty good even with training for only 5 epochs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open(file_name, 'rb') as image:\n", + " f = image.read()\n", + " b = bytearray(f)\n", + "ic_classifier.content_type = 'application/x-image'\n", + "results = ic_classifier.predict(b)\n", + "prob = json.loads(results)\n", + "classes = ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane']\n", + "for idx, val in enumerate(classes):\n", + " print('%s:%f '%(classes[idx], prob[idx]), end='')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up\n", + "You can use the following command to delete the endpoint. The endpoint that is created above is persistent and would consume resources till it is deleted. It is good to delete the endpoint when it is not used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-transfer-learning.ipynb b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-transfer-learning.ipynb new file mode 100644 index 0000000000..adc9f85c5d --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_highlevel/Image-classification-transfer-learning.ipynb @@ -0,0 +1,358 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification transfer learning demo\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + "3. [Fine-tuning the Image classification model](#Fine-tuning-the-Image-classification-model)\n", + "4. [Training parameters](#Training-parameters)\n", + "5. [Start the training](#Start-the-training)\n", + "6. [Inference](#Inference)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of distributed image classification algorithm in transfer learning mode. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new dataset. In particular, the pre-trained model will be fine-tuned using [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket = sess.default_bucket()\n", + "prefix = 'ic-transfer-learning'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fine-tuning the Image classification model\n", + "\n", + "The caltech 256 dataset consist of images from 257 categories (the last one being a clutter category) and has 30k images with a minimum of 80 images and a maximum of about 800 images per category. \n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [recordio format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec). Files for both these formats are available at http://data.dmlc.ml/mxnet/data/caltech-256/. In this example, we will use the recordio format for training and use the training/validation split [specified here](http://data.dmlc.ml/mxnet/data/caltech-256/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "import boto3\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + " \n", + "def upload_to_s3(channel, file):\n", + " s3 = boto3.resource('s3')\n", + " data = open(file, \"rb\")\n", + " key = channel + '/' + file\n", + " s3.Bucket(bucket).put_object(Key=key, Body=data)\n", + "\n", + "\n", + "# # caltech-256\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')\n", + "upload_to_s3('validation', 'caltech-256-60-val.rec')\n", + "upload_to_s3('train', 'caltech-256-60-train.rec')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "\n", + "# upload the lst files to train and validation channels\n", + "!aws s3 cp caltech-256-60-train.rec $s3train --quiet\n", + "!aws s3 cp caltech-256-60-val.rec $s3validation --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once we have the data available in the correct format for training, the next step is to actually train the model using the data. Before training the model, we need to setup the training parameters. The next section will explain the parameters in detail." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "isConfigCell": true + }, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " num_training_samples=15420,\n", + " mini_batch_size=128,\n", + " epochs=2,\n", + " learning_rate=0.01,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the class of the image. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inteference. The network outputs class probabilities and typically, one selects the class with the maximum probability as the final class output.\n", + "\n", + "**Note:** The output class detected by the network may not be accurate in this example. To limit the time taken and cost of training, we have trained the model only for a couple of epochs. If the network is trained for more epochs (say 20), then the output class will be more accurate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean up\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Run the following cell to delete the endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_highlevel/README.md b/introduction_to_amazon_algorithms/imageclassification_highlevel/README.md new file mode 100644 index 0000000000..2637256b30 --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_highlevel/README.md @@ -0,0 +1,11 @@ +### SageMaker Image classification full training +This notebook `ImageClassification_FullTraining.ipynb` demos an end-2-end system for image classification training using resnet model. Caltech-256 dataset is used as a sample dataset. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. + +### SageMaker Image classification transfer learning +This notebook `Imageclassification-transfer-learning.ipynb` demos an end-2-end system for image classification fine-tuning using a pre-trained resnet model on imagenet dataset. Caltech-256 dataset is used as a transfer learning dataset. The network re-initializes the output layer with the number of classes in the Caltech dataset and retrains the layer while at the same time fine-tuning the other layers. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. + +### SageMaker Image classification lst format +This notebook `Imageclassification-lst-format.ipynb` demos an end-2-end system for image classification training with image and list files. Caltech-256 dataset is used as a transfer learning dataset. The network re-initializes the output layer with the number of classes in the Caltech dataset and retrains the layer while at the same time fine-tuning the other layers. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. + +### SageMaker Image classification multi-label training +This notebook `Imageclassification-multilabel-lst.ipynb` demos an end-2-end system for image classification training on multi-label datasets using image and list files. MSCOCO dataset is used to create a multi-label dataset using the first 5 categories. It shows how the lst file can be generated for multi-label datasets and used for training. The network re-initializes the fully-connected layer and retrains the layer while at the same time fine-tuning the other layers. The network shows the use of multi_label parameter as well as the use_weighted_loss parameter. Various other parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. From fe2a8c452f54e0fe0d2f58ab3df532d1e99edf37 Mon Sep 17 00:00:00 2001 From: Gurumurthy Swaminathan Date: Thu, 13 Sep 2018 11:06:56 -0700 Subject: [PATCH 8/9] multi-label notebook and high-level api notebooks Added multi-label notebook and high-level notebooks for existing ones --- ...lassification-fulltraining-highlevel.ipynb | 365 +++++++++++++ ...-classification-lst-format-highlevel.ipynb | 429 +++++++++++++++ ...fication-transfer-learning-highlevel.ipynb | 358 +++++++++++++ .../imageclassification_caltech/README.md | 11 +- .../Image-classification-multilabel-lst.ipynb | 504 ++++++++++++++++++ .../README.md | 2 + 6 files changed, 1668 insertions(+), 1 deletion(-) create mode 100644 introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining-highlevel.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-lst-format-highlevel.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning-highlevel.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/Image-classification-multilabel-lst.ipynb create mode 100644 introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/README.md diff --git a/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining-highlevel.ipynb b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining-highlevel.ipynb new file mode 100644 index 0000000000..e5717866b8 --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining-highlevel.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# End-to-End Multiclass Image Classification Example\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + " 1. [Permissions and environment variables](#Permissions-and-environment-variables)\n", + " 2. [Prepare the data](#Prepare-the-data)\n", + "3. [Training the model](#Training-the-model)\n", + " 1. [Training parameters](#Training-parameters)\n", + " 2. [Start the training](#Start-the-training)\n", + "4. [Inference](#Inference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of distributed image classification algorithm. In this demo, we will use the Amazon sagemaker image classification algorithm to train on the [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket=sess.default_bucket()\n", + "prefix = 'ic-fulltraining'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data preparation\n", + "Download the data and transfer to S3 for use in training. In this demo, we are using [Caltech-256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/) dataset, which contains 30608 images of 256 objects. For the training and validation data, we follow the splitting scheme in this MXNet [example](https://github.com/apache/incubator-mxnet/blob/master/example/image-classification/data/caltech256.sh). In particular, it randomly selects 60 images per class for training, and uses the remaining data for validation. The algorithm takes `RecordIO` file as input. The user can also provide the image files as input, which will be converted into `RecordIO` format using MXNet's [im2rec](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec) tool. It takes around 50 seconds to converted the entire Caltech-256 dataset (~1.2GB) on a p2.xlarge instance. However, for this demo, we will use record io format. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os \n", + "import urllib.request\n", + "import boto3\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + " \n", + "def upload_to_s3(channel, file):\n", + " s3 = boto3.resource('s3')\n", + " data = open(file, \"rb\")\n", + " key = channel + '/' + file\n", + " s3.Bucket(bucket).put_object(Key=key, Body=data)\n", + "\n", + "\n", + "# caltech-256\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "\n", + "# upload the lst files to train and validation channels\n", + "!aws s3 cp caltech-256-60-train.rec $s3train --quiet\n", + "!aws s3 cp caltech-256-60-val.rec $s3validation --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "Once we have the data available in the correct format for training, the next step is to actually train the model using the data. After setting training parameters, we kick off training, and poll for status until training is completed.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the model\n", + "\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **top_k**: Report the top-k accuracy during training.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " num_training_samples=15420,\n", + " mini_batch_size=128,\n", + " epochs=5,\n", + " learning_rate=0.01,\n", + " top_k=2,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the topic mixture representing a given document. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inteference. The network outputs class probabilities and typically, one selects the class with the maximum probability as the final class output.\n", + "\n", + "**Note:** The output class detected by the network may not be accurate in this example. To limit the time taken and cost of training, we have trained the model only for 5 epochs. If the network is trained for more epochs (say 20), then the output class will be more accurate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean up\n", + "\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Uncomment and run the following cell to delete the endpoint and model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-lst-format-highlevel.ipynb b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-lst-format-highlevel.ipynb new file mode 100644 index 0000000000..6a790c4528 --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-lst-format-highlevel.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification training with image format demo\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + " 1. [Permissions and environment variables](#Permissions-and-environment-variables)\n", + " 2. [Prepare the data](#Prepare-the-data)\n", + "3. [Fine-tuning The Image Classification Model](#Fine-tuning-the-Image-classification-model)\n", + " 1. [Training parameters](#Training-parameters)\n", + " 2. [Start the training](#Start-the-training)\n", + "4. [Inference](#Inference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of the image classification algorithm training with image format. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new dataset. In particular, the pre-trained model will be fine-tuned using [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket=sess.default_bucket() \n", + "prefix = 'ic-lstformat'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Prepare the data\n", + "The caltech 256 dataset consist of images from 257 categories (the last one being a clutter category) and has 30k images with a minimum of 80 images and a maximum of about 800 images per category. \n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [RecordIO format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) (content type: application/x-recordio) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec) (content type: application/x-image). Files for both these formats are available at http://data.dmlc.ml/mxnet/data/caltech-256/. In this example, we will use the lst format for training and use the training/validation split [specified here](http://data.dmlc.ml/mxnet/data/caltech-256/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + "\n", + "# Caltech-256 image files\n", + "download('http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar')\n", + "!tar -xf 256_ObjectCategories.tar\n", + "\n", + "# Tool for creating lst file\n", + "download('https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "\n", + "mkdir -p caltech_256_train_60\n", + "for i in 256_ObjectCategories/*; do\n", + " c=`basename $i`\n", + " mkdir -p caltech_256_train_60/$c\n", + " for j in `ls $i/*.jpg | shuf | head -n 60`; do\n", + " mv $j caltech_256_train_60/$c/\n", + " done\n", + "done\n", + "\n", + "python im2rec.py --list --recursive caltech-256-60-train caltech_256_train_60/\n", + "python im2rec.py --list --recursive caltech-256-60-val 256_ObjectCategories/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A .lst file is a tab-separated file with three columns that contains a list of image files. The first column specifies the image index, the second column specifies the class label index for the image, and the third column specifies the relative path of the image file. The image index in the first column should be unique across all of the images. Here we make an image list file using the [im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) tool from MXNet. You can also create the .lst file in your own way. An example of .lst file is shown as follows. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!head -n 3 ./caltech-256-60-train.lst > example.lst\n", + "f = open('example.lst','r')\n", + "lst_content = f.read()\n", + "print(lst_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When you are bringing your own image files to train, please ensure that the .lst file follows the same format as described above. In order to train with the lst format interface, passing the lst file for both training and validation in the appropriate format is mandatory. Once we have the data available in the correct format for training, the next step is to upload the image and .lst file to S3 bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "s3train_lst = 's3://{}/{}/train_lst/'.format(bucket, prefix)\n", + "s3validation_lst = 's3://{}/{}/validation_lst/'.format(bucket, prefix)\n", + "\n", + "# upload the image files to train and validation channels\n", + "!aws s3 cp caltech_256_train_60 $s3train --recursive --quiet\n", + "!aws s3 cp 256_ObjectCategories $s3validation --recursive --quiet\n", + "\n", + "# upload the lst files to train_lst and validation_lst channels\n", + "!aws s3 cp caltech-256-60-train.lst $s3train_lst --quiet\n", + "!aws s3 cp caltech-256-60-val.lst $s3validation_lst --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have all the data stored in S3 bucket. The image and lst files will be converted to RecordIO file internelly by the image classification algorithm. But if you want do the conversion, the following cell shows how to do it using the [im2rec](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py) tool. Note that this is just an example of creating RecordIO files. We are **_not_** using them for training in this notebook. More details on creating RecordIO files can be found in this [tutorial](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-256-60-val 256_ObjectCategories/\n", + "python im2rec.py --resize 256 --quality 90 --num-thread 16 caltech-256-60-train caltech_256_train_60/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After you created the RecordIO files, you can upload them to the train and validation channels for training. To train with RecordIO format, you can follow \"[Image-classification-fulltraining.ipynb](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-fulltraining.ipynb)\" and \"[Image-classification-transfer-learning.ipynb](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning.ipynb)\". Again, we will **_not_** use the RecordIO file for the training. The following sections will only show you how to train a model with images and list files." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training the model, we need to setup the training parameters. The next section will explain the parameters in detail." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fine-tuning the Image Classification Model\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **top_k**: Report the top-k accuracy during training.\n", + "* **resize**: Resize the image before using it for training. The images are resized so that the shortest side is of this parameter. If the parameter is not set, then the training data is used as such without resizing.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "isConfigCell": true + }, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " mini_batch_size=128,\n", + " epochs=2,\n", + " learning_rate=0.01,\n", + " top_k=2,\n", + " num_training_samples=15420,\n", + " resize = 256,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "train_data_lst = sagemaker.session.s3_input(s3train_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data_lst = sagemaker.session.s3_input(s3validation_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data, \n", + " 'train_lst': train_data_lst, 'validation_lst': validation_data_lst}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the topic mixture representing a given document. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Clean up\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Uncomment and run the following cell to delete the endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning-highlevel.ipynb b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning-highlevel.ipynb new file mode 100644 index 0000000000..adc9f85c5d --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_caltech/Image-classification-transfer-learning-highlevel.ipynb @@ -0,0 +1,358 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification transfer learning demo\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites and Preprocessing](#Prequisites-and-Preprocessing)\n", + "3. [Fine-tuning the Image classification model](#Fine-tuning-the-Image-classification-model)\n", + "4. [Training parameters](#Training-parameters)\n", + "5. [Start the training](#Start-the-training)\n", + "6. [Inference](#Inference)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of distributed image classification algorithm in transfer learning mode. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new dataset. In particular, the pre-trained model will be fine-tuned using [caltech-256 dataset](http://www.vision.caltech.edu/Image_Datasets/Caltech256/). \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites and Preprocessing\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket = sess.default_bucket()\n", + "prefix = 'ic-transfer-learning'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fine-tuning the Image classification model\n", + "\n", + "The caltech 256 dataset consist of images from 257 categories (the last one being a clutter category) and has 30k images with a minimum of 80 images and a maximum of about 800 images per category. \n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [recordio format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec). Files for both these formats are available at http://data.dmlc.ml/mxnet/data/caltech-256/. In this example, we will use the recordio format for training and use the training/validation split [specified here](http://data.dmlc.ml/mxnet/data/caltech-256/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "import boto3\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + " \n", + "def upload_to_s3(channel, file):\n", + " s3 = boto3.resource('s3')\n", + " data = open(file, \"rb\")\n", + " key = channel + '/' + file\n", + " s3.Bucket(bucket).put_object(Key=key, Body=data)\n", + "\n", + "\n", + "# # caltech-256\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec')\n", + "download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec')\n", + "upload_to_s3('validation', 'caltech-256-60-val.rec')\n", + "upload_to_s3('train', 'caltech-256-60-train.rec')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "\n", + "# upload the lst files to train and validation channels\n", + "!aws s3 cp caltech-256-60-train.rec $s3train --quiet\n", + "!aws s3 cp caltech-256-60-val.rec $s3validation --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once we have the data available in the correct format for training, the next step is to actually train the model using the data. Before training the model, we need to setup the training parameters. The next section will explain the parameters in detail." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the new dataset. Imagenet was trained with 1000 output classes but the number of output classes can be changed for fine-tuning. For caltech, we use 257 because it has 256 object categories + 1 clutter class.\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 15240 for caltech dataset with the current split.\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run.\n", + "* **epochs**: Number of training epochs.\n", + "* **learning_rate**: Learning rate for training.\n", + "* **precision_dtype**: Training datatype precision (default: float32). If set to 'float16', the training will be done in mixed_precision mode and will be faster than float32 mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "isConfigCell": true + }, + "outputs": [], + "source": [ + "ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=257,\n", + " num_training_samples=15420,\n", + " mini_batch_size=128,\n", + " epochs=2,\n", + " learning_rate=0.01,\n", + " precision_dtype='float32')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Input data specification\n", + "Set the data type and channels used for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-recordio', s3_data_type='S3Prefix')\n", + "\n", + "data_channels = {'train': train_data, 'validation': validation_data}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the class of the image. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -O /tmp/test.jpg http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg\n", + "file_name = '/tmp/test.jpg'\n", + "# test image\n", + "from IPython.display import Image\n", + "Image(file_name) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inteference. The network outputs class probabilities and typically, one selects the class with the maximum probability as the final class output.\n", + "\n", + "**Note:** The output class detected by the network may not be accurate in this example. To limit the time taken and cost of training, we have trained the model only for a couple of epochs. If the network is trained for more epochs (say 20), then the output class will be more accurate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "\n", + "with open(file_name, 'rb') as f:\n", + " payload = f.read()\n", + " payload = bytearray(payload)\n", + " \n", + "ic_classifier.content_type = 'application/x-image'\n", + "result = json.loads(ic_classifier.predict(payload))\n", + "# the result will output the probabilities for all classes\n", + "# find the class with maximum probability and print the class index\n", + "index = np.argmax(result)\n", + "object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter']\n", + "print(\"Result: label - \" + object_categories[index] + \", probability - \" + str(result[index]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean up\n", + "\n", + "When we're done with the endpoint, we can just delete it and the backing instances will be released. Run the following cell to delete the endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_caltech/README.md b/introduction_to_amazon_algorithms/imageclassification_caltech/README.md index 703b4a4fc4..27158c1043 100644 --- a/introduction_to_amazon_algorithms/imageclassification_caltech/README.md +++ b/introduction_to_amazon_algorithms/imageclassification_caltech/README.md @@ -1,8 +1,17 @@ ### SageMaker Image classification full training -This notebook `ImageClassification_FullTraining.ipynb` demos an end-2-end system for image classification training using resnet model. Caltech-256 dataset is used as a sample dataset. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. +This notebook `ImageClassification-fulltraining.ipynb` demos an end-2-end system for image classification training using resnet model. Caltech-256 dataset is used as a sample dataset. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. ### SageMaker Image classification transfer learning This notebook `Imageclassification-transfer-learning.ipynb` demos an end-2-end system for image classification fine-tuning using a pre-trained resnet model on imagenet dataset. Caltech-256 dataset is used as a transfer learning dataset. The network re-initializes the output layer with the number of classes in the Caltech dataset and retrains the layer while at the same time fine-tuning the other layers. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. ### SageMaker Image classification lst format This notebook `Imageclassification-lst-format.ipynb` demos an end-2-end system for image classification training with image and list files. Caltech-256 dataset is used as a transfer learning dataset. The network re-initializes the output layer with the number of classes in the Caltech dataset and retrains the layer while at the same time fine-tuning the other layers. Various parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. + +### SageMaker Image classification full training highlevel +This notebook `ImageClassification-fulltraining-highlevel.ipynb` is similar to the `ImageClassification-fulltraining.ipynb` but using Sagemaker high-level APIs + +### SageMaker Image classification transfer learning highlevel +This notebook `Imageclassification-transfer-learning-highlevel.ipynb` is similar to the `ImageClassification-transfer-learning.ipynb` but using Sagemaker high-level APIs + +### SageMaker Image classification lst format highlevel +This notebook `Imageclassification-lst-format-highlevel.ipynb` is similar to the `ImageClassification-lst-format.ipynb` but using Sagemaker high-level APIs diff --git a/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/Image-classification-multilabel-lst.ipynb b/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/Image-classification-multilabel-lst.ipynb new file mode 100644 index 0000000000..1e645043de --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/Image-classification-multilabel-lst.ipynb @@ -0,0 +1,504 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image classification multi-label classification\n", + "\n", + "1. [Introduction](#Introduction)\n", + "2. [Prerequisites](#Prequisites)\n", + "3. [Data Preparation](#Data-Preparation)\n", + "3. [Multi-label Training](#Multi-label-Training)\n", + "4. [Inference](#Inference)\n", + "5. [Clean-up](#Clean-up)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "Welcome to our end-to-end example of multi-label classification using the Sagemaker 1P image classification algorithm. In this demo, we will use the Amazon sagemaker image classification algorithm in transfer learning mode to fine-tune a pre-trained model (trained on imagenet data) to learn to classify a new multi-label dataset. In particular, the pre-trained model will be fine-tuned using [MS-COCO](http://cocodataset.org/#overview) dataset. \n", + "\n", + "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prequisites\n", + "\n", + "### Permissions and environment variables\n", + "\n", + "Here we set up the linkage and authentication to AWS services. There are three parts to this:\n", + "\n", + "* The roles used to give learning and hosting access to your data. This will automatically be obtained from the role used to start the notebook\n", + "* The S3 bucket that you want to use for training and model data\n", + "* The Amazon sagemaker image classification docker image which need not be changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import sagemaker\n", + "from sagemaker import get_execution_role\n", + "\n", + "role = get_execution_role()\n", + "print(role)\n", + "\n", + "sess = sagemaker.Session()\n", + "bucket = sess.default_bucket()\n", + "prefix = 'ic-multilabel'\n", + "\n", + "print('using bucket %s'%bucket)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.amazon.amazon_estimator import get_image_uri\n", + "\n", + "training_image = get_image_uri(sess.boto_region_name, 'image-classification', repo_version=\"latest\")\n", + "print (training_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Preparation\n", + "MS COCO is a large-scale dataset for multiple computer vision tasks, including object detection, segmentation, and captioning. In this notebook, we will use the object detection dataset to construct the multi-label dataset for classification. We will use the 2017 validation set from MS-COCO dataset to train multi-label classifier. MS-COCO dataset consist of images from 80 categories. We will choose 5 categories out of 80 and train the model to learn to classify these 5 categories. These are: \n", + "\n", + "1. Person\n", + "2. Bicycle\n", + "3. Car\n", + "4. Motorcycle\n", + "5. Airplane\n", + "\n", + "An image can contain objects of multiple categories. We first create a dataset with these 5 categories. COCO is a very large dataset, and the purpose of this notebook is to show how multi-label classification works. So, instead we’ll take what COCO calls their validation dataset from 2017, and use this as our only data. We then split this dataset into a train and holdout dataset for fine tuning the model and testing our final accuracy\n", + "\n", + "The image classification algorithm can take two types of input formats. The first is a [recordio format](https://mxnet.incubator.apache.org/tutorials/basic/record_io.html) and the other is a [lst format](https://mxnet.incubator.apache.org/how_to/recordio.html?highlight=im2rec). We will use the lst file format for training. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset License\n", + "\n", + "The annotations in this dataset belong to the COCO Consortium and are licensed under a Creative Commons Attribution 4.0 License. The COCO Consortium does not own the copyright of the images. Use of the images must abide by the Flickr Terms of Use. The users of the images accept full responsibility for the use of the dataset, including but not limited to the use of any copies of copyrighted images that they may create from the dataset. Before you use this data for any other purpose than this example, you should understand the data license, described at http://cocodataset.org/#termsofuse\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "\n", + "def download(url):\n", + " filename = url.split(\"/\")[-1]\n", + " if not os.path.exists(filename):\n", + " urllib.request.urlretrieve(url, filename)\n", + "\n", + "\n", + "# MSCOCO validation image files\n", + "download('http://images.cocodataset.org/zips/val2017.zip')\n", + "download('http://images.cocodataset.org/annotations/annotations_trainval2017.zip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "unzip -qo val2017.zip\n", + "unzip -qo annotations_trainval2017.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install tools\n", + "\n", + "We need pycocotools to parse the annotations for the MSCOCO dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%bash\n", + "pip -q install pycocotools" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parse the annotation to create lst file\n", + "Use pycocotools to parse the annotation and create the lst file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pycocotools.coco import COCO\n", + "import numpy as np\n", + "import os\n", + "\n", + "annFile='./annotations/instances_val2017.json'\n", + "coco=COCO(annFile)\n", + "\n", + "catIds = coco.getCatIds()\n", + "image_ids_of_cats = []\n", + "for cat in catIds:\n", + " image_ids_of_cats.append(coco.getImgIds(catIds=cat))\n", + "\n", + "image_ids = []\n", + "labels = []\n", + "# use only the first 5 classes\n", + "# obtain image ids and labels for images with these 5 classes\n", + "cats = [1, 2, 3, 4, 5]\n", + "for ind_cat in cats:\n", + " for image_id in image_ids_of_cats[ind_cat-1]:\n", + " if image_id in image_ids:\n", + " labels[image_ids.index(image_id)][ind_cat-1] = 1\n", + " else:\n", + " image_ids.append(image_id)\n", + " labels.append(np.zeros(len(cats), dtype=np.int))\n", + " labels[-1][ind_cat-1] = 1\n", + "# Construct the lst file from the image ids and labels\n", + "# The first column is the image index, the last is the image filename\n", + "# and the second to last but one are the labels\n", + "with open('image.lst', 'w') as fp:\n", + " sum_labels = labels[0]\n", + " for ind, image_id in enumerate(image_ids):\n", + " coco_img = coco.loadImgs(image_id)\n", + " image_path = os.path.join(coco_img[0]['file_name'])\n", + " label_h = labels[ind]\n", + " sum_labels += label_h\n", + " fp.write(str(ind) + '\\t')\n", + " for j in label_h:\n", + " fp.write(str(j) + '\\t')\n", + " fp.write(image_path)\n", + " fp.write('\\n')\n", + " fp.close()\n", + "print(sum_labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create training and validation set\n", + "Create training and validation set by splitting the lst file. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + "shuf image.lst > im.lst\n", + "head -n 2500 im.lst > mscocoval2017train.lst\n", + "tail -n +2501 im.lst > mscocoval2017val.lst\n", + "head mscocoval2017train.lst\n", + "wc -l mscocoval2017train.lst\n", + "wc -l mscocoval2017val.lst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload the data\n", + "Upload the data onto the s3 bucket. The images are uploaded onto train and validation bucket. The lst files are uploaded to train_lst and validation_lst folders. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Four channels: train, validation, train_lst, and validation_lst\n", + "s3train = 's3://{}/{}/train/'.format(bucket, prefix)\n", + "s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)\n", + "s3train_lst = 's3://{}/{}/train_lst/'.format(bucket, prefix)\n", + "s3validation_lst = 's3://{}/{}/validation_lst/'.format(bucket, prefix)\n", + "\n", + "# upload the image files to train and validation channels\n", + "!aws s3 cp val2017 $s3train --recursive --quiet\n", + "!aws s3 cp val2017 $s3validation --recursive --quiet\n", + "\n", + "# upload the lst files to train_lst and validation_lst channels\n", + "!aws s3 cp mscocoval2017train.lst $s3train_lst --quiet\n", + "!aws s3 cp mscocoval2017val.lst $s3validation_lst --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multi-label Training\n", + "Now that we are done with all the setup that is needed, we are ready to train our object detector. To begin, let us create a ``sageMaker.estimator.Estimator`` object. This estimator will launch the training job.\n", + "\n", + "### Training parameters\n", + "There are two kinds of parameters that need to be set for training. The first one are the parameters for the training job. These include:\n", + "\n", + "* **Training instance count**: This is the number of instances on which to run the training. When the number of instances is greater than one, then the image classification algorithm will run in distributed settings. \n", + "* **Training instance type**: This indicates the type of machine on which to run the training. Typically, we use GPU instances for these training \n", + "* **Output path**: This the s3 folder in which the training output is stored" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)\n", + "multilabel_ic = sagemaker.estimator.Estimator(training_image,\n", + " role, \n", + " train_instance_count=1, \n", + " train_instance_type='ml.p2.xlarge',\n", + " train_volume_size = 50,\n", + " train_max_run = 360000,\n", + " input_mode= 'File',\n", + " output_path=s3_output_location,\n", + " sagemaker_session=sess)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Algorithm parameters\n", + "\n", + "Apart from the above set of parameters, there are hyperparameters that are specific to the algorithm. These are:\n", + "\n", + "* **num_layers**: The number of layers (depth) for the network. We use 18 in this samples but other values such as 50, 152 can be used.\n", + "* **use_pretrained_model**: Set to 1 to use pretrained model for transfer learning.\n", + "* **image_shape**: The input image dimensions,'num_channels, height, width', for the network. It should be no larger than the actual image size. The number of channels should be same as the actual image.\n", + "* **num_classes**: This is the number of output classes for the dataset. We use 5 classes from MSCOCO and hence it is set to 5\n", + "* **mini_batch_size**: The number of training samples used for each mini batch. In distributed training, the number of training samples used per batch will be N * mini_batch_size where N is the number of hosts on which training is run\n", + "* **resize**: Resize the image before using it for training. The images are resized so that the shortest side is of this parameter. If the parameter is not set, then the training data is used as such without resizing.\n", + "* **epochs**: Number of training epochs\n", + "* **learning_rate**: Learning rate for training\n", + "* **num_training_samples**: This is the total number of training samples. It is set to 2500 for COCO dataset with the current split\n", + "* **use_weighted_loss**: This parameter is used to balance the influence of the positive and negative samples within the dataset.\n", + "* **augmentation_type**: This parameter determines the type of augmentation used for training. It can take on three values, 'crop', 'crop_color' and 'crop_color_transform'\n", + "* **precision_dtype**: The data type precision used during training. Using ``float16`` can lead to faster training with minimal drop in accuracy, paritcularly on P3 machines. By default, the parameter is set to ``float32``\n", + "* **multi_label**: Set multi_label to 1 for multi-label processing\n", + "\n", + "You can find a detailed description of all the algorithm parameters at https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multilabel_ic.set_hyperparameters(num_layers=18,\n", + " use_pretrained_model=1,\n", + " image_shape = \"3,224,224\",\n", + " num_classes=5,\n", + " mini_batch_size=128,\n", + " resize=256,\n", + " epochs=5,\n", + " learning_rate=0.001,\n", + " num_training_samples=2500,\n", + " use_weighted_loss=1,\n", + " augmentation_type = 'crop_color_transform',\n", + " precision_dtype='float32',\n", + " multi_label=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Input data specification\n", + "Set the data type and channels used for training. In this training, we use application/x-image content type that require individual images and lst file for data input. In addition, Sagemaker image classification algorithm supports application/x-recordio format which can be used for larger datasets. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data = sagemaker.session.s3_input(s3validation, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "train_data_lst = sagemaker.session.s3_input(s3train_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "validation_data_lst = sagemaker.session.s3_input(s3validation_lst, distribution='FullyReplicated', \n", + " content_type='application/x-image', s3_data_type='S3Prefix')\n", + "data_channels = {'train': train_data, 'validation': validation_data, 'train_lst': train_data_lst, \n", + " 'validation_lst': validation_data_lst}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start the training\n", + "Start training by calling the fit method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "multilabel_ic.fit(inputs=data_channels, logs=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "***\n", + "\n", + "A trained model does nothing on its own. We now want to use the model to perform inference. For this example, that means predicting the class of the image. You can deploy the created model by using the deploy method in the estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier = multilabel_ic.deploy(initial_instance_count = 1,\n", + " instance_type = 'ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download test image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline \n", + "!wget -q -O /tmp/test.jpg https://images.pexels.com/photos/763398/pexels-photo-763398.jpeg\n", + " \n", + "import cv2\n", + "# resize image size for inference\n", + "file_name = '/tmp/test.jpg'\n", + "im = cv2.imread(file_name)\n", + "im = cv2.resize(im, (600, 400))\n", + "cv2.imwrite(file_name, im)\n", + "\n", + "# display test image\n", + "from IPython.display import Image, display\n", + "img = Image(file_name) \n", + "display(img)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "\n", + "Evaluate the image through the network for inference. The network outputs class probabilities for all the classes. As can be seen from this example, the network output is pretty good even with training for only 5 epochs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open(file_name, 'rb') as image:\n", + " f = image.read()\n", + " b = bytearray(f)\n", + "ic_classifier.content_type = 'application/x-image'\n", + "results = ic_classifier.predict(b)\n", + "prob = json.loads(results)\n", + "classes = ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane']\n", + "for idx, val in enumerate(classes):\n", + " print('%s:%f '%(classes[idx], prob[idx]), end='')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up\n", + "You can use the following command to delete the endpoint. The endpoint that is created above is persistent and would consume resources till it is deleted. It is good to delete the endpoint when it is not used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ic_classifier.delete_endpoint()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_mxnet_p36", + "language": "python", + "name": "conda_mxnet_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/README.md b/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/README.md new file mode 100644 index 0000000000..61df22af6d --- /dev/null +++ b/introduction_to_amazon_algorithms/imageclassification_mscoco_multi_label/README.md @@ -0,0 +1,2 @@ +### SageMaker Image classification multi-label training +This notebook `Imageclassification-multilabel-lst.ipynb` demos an end-2-end system for image classification training on multi-label datasets using image and list files. MSCOCO dataset is used to create a multi-label dataset using the first 5 categories. It shows how the lst file can be generated for multi-label datasets and used for training. The network re-initializes the fully-connected layer and retrains the layer while at the same time fine-tuning the other layers. The network shows the use of multi_label parameter as well as the use_weighted_loss parameter. Various other parameters such as network depth (number of layers), batch_size, learning_rate, etc., can be varied in the training. Once the training is complete, the notebook shows how to host the trained model for inference. From 6fabfba03b120a8c94b1d28f6c9331582b1dbe4e Mon Sep 17 00:00:00 2001 From: djarpin Date: Mon, 17 Sep 2018 14:55:20 -0700 Subject: [PATCH 9/9] Updated: notebook name from HPO to tune --- .../{hpo_r_bring_your_own.ipynb => tune_r_bring_your_own.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename hyperparameter_tuning/r_bring_your_own/{hpo_r_bring_your_own.ipynb => tune_r_bring_your_own.ipynb} (100%) diff --git a/hyperparameter_tuning/r_bring_your_own/hpo_r_bring_your_own.ipynb b/hyperparameter_tuning/r_bring_your_own/tune_r_bring_your_own.ipynb similarity index 100% rename from hyperparameter_tuning/r_bring_your_own/hpo_r_bring_your_own.ipynb rename to hyperparameter_tuning/r_bring_your_own/tune_r_bring_your_own.ipynb