From d3abc6afde0dc7c1b76127b6f3cb598d5af1654e Mon Sep 17 00:00:00 2001 From: Suraj Kota Date: Thu, 18 Aug 2022 17:52:40 -0700 Subject: [PATCH 1/3] update pytorch DLC version to 1.11 The notebook fails with current 1.8 pytorch. I think its a problem with the torchvision installed in the container. ``` AlgorithmError: ExecuteUserScriptError: Command "/opt/conda/bin/python3.6 mnist.py --backend gloo --epochs 1" INFO:__main__:Initialized the distributed environment: 'gloo' backend on 2 nodes. Current host rank is 0. Number of gpus: 0 INFO:__main__:Get train data loader Traceback (most recent call last): File "mnist.py", line 257, in train(parser.parse_args()) File "mnist.py", line 114, in train train_loader = _get_train_data_loader(args.batch_size, args.data_dir, is_distributed, **kwargs) File "mnist.py", line 48, in _get_train_data_loader [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] File "/opt/conda/lib/python3.6/site-packages/torchvision/datasets/mnist.py", line 83, in __init__ ' You can use download=True to download it') RuntimeError: Dataset not found. You can use download=True to download it, exit code: 1 ``` --- sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb index 4c78865757..8adb1993b1 100644 --- a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb +++ b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb @@ -204,8 +204,8 @@ "\n", "estimator = PyTorch(entry_point='mnist.py',\n", " role=role,\n", - " py_version='py3',\n", - " framework_version='1.8.0',\n", + " py_version='py38',\n", + " framework_version='1.11.0',\n", " instance_count=2,\n", " instance_type='ml.c5.2xlarge',\n", " hyperparameters={\n", From bcf330e629d261995378d049a2995dc898dac601 Mon Sep 17 00:00:00 2001 From: Suraj Kota Date: Fri, 19 Aug 2022 01:28:59 +0000 Subject: [PATCH 2/3] formatting --- .../pytorch_mnist/pytorch_mnist.ipynb | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb index 8adb1993b1..39cbc5eda9 100644 --- a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb +++ b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb @@ -69,7 +69,7 @@ "sagemaker_session = sagemaker.Session()\n", "\n", "bucket = sagemaker_session.default_bucket()\n", - "prefix = 'sagemaker/DEMO-pytorch-mnist'\n", + "prefix = \"sagemaker/DEMO-pytorch-mnist\"\n", "\n", "role = sagemaker.get_execution_role()" ] @@ -111,14 +111,16 @@ "from torchvision.datasets import MNIST\n", "from torchvision import transforms\n", "\n", - "MNIST.mirrors = [\"https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/\"]\n", + "MNIST.mirrors = [\n", + " \"https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/\"\n", + "]\n", "\n", "MNIST(\n", - " 'data',\n", + " \"data\",\n", " download=True,\n", " transform=transforms.Compose(\n", " [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n", - " )\n", + " ),\n", ")" ] }, @@ -144,8 +146,8 @@ } ], "source": [ - "inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)\n", - "print('input spec (in this case, just an S3 path): {}'.format(inputs))" + "inputs = sagemaker_session.upload_data(path=\"data\", bucket=bucket, key_prefix=prefix)\n", + "print(\"input spec (in this case, just an S3 path): {}\".format(inputs))" ] }, { @@ -202,16 +204,15 @@ "source": [ "from sagemaker.pytorch import PyTorch\n", "\n", - "estimator = PyTorch(entry_point='mnist.py',\n", - " role=role,\n", - " py_version='py38',\n", - " framework_version='1.11.0',\n", - " instance_count=2,\n", - " instance_type='ml.c5.2xlarge',\n", - " hyperparameters={\n", - " 'epochs': 1,\n", - " 'backend': 'gloo'\n", - " })" + "estimator = PyTorch(\n", + " entry_point=\"mnist.py\",\n", + " role=role,\n", + " py_version=\"py38\",\n", + " framework_version=\"1.11.0\",\n", + " instance_count=2,\n", + " instance_type=\"ml.c5.2xlarge\",\n", + " hyperparameters={\"epochs\": 1, \"backend\": \"gloo\"},\n", + ")" ] }, { @@ -532,7 +533,7 @@ } ], "source": [ - "estimator.fit({'training': inputs})" + "estimator.fit({\"training\": inputs})" ] }, { @@ -562,7 +563,7 @@ } ], "source": [ - "predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')" + "predictor = estimator.deploy(initial_instance_count=1, instance_type=\"ml.m4.xlarge\")" ] }, { @@ -600,16 +601,20 @@ "metadata": {}, "outputs": [], "source": [ - "import gzip \n", + "import gzip\n", "import numpy as np\n", "import random\n", "import os\n", "\n", - "data_dir = 'data/MNIST/raw'\n", + "data_dir = \"data/MNIST/raw\"\n", "with gzip.open(os.path.join(data_dir, \"t10k-images-idx3-ubyte.gz\"), \"rb\") as f:\n", - " images = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28).astype(np.float32)\n", + " images = (\n", + " np.frombuffer(f.read(), np.uint8, offset=16)\n", + " .reshape(-1, 28, 28)\n", + " .astype(np.float32)\n", + " )\n", "\n", - "mask = random.sample(range(len(images)), 16) # randomly select some of the test images\n", + "mask = random.sample(range(len(images)), 16) # randomly select some of the test images\n", "mask = np.array(mask, dtype=np.int)\n", "data = images[mask]" ] @@ -710,9 +715,7 @@ "metadata": {}, "outputs": [], "source": [ - "sagemaker_session.delete_endpoint(\n", - " endpoint_name = predictor.endpoint_name\n", - ")" + "sagemaker_session.delete_endpoint(endpoint_name=predictor.endpoint_name)" ] } ], From 35701de5d3b54b2240c7407ae16c4bb315888a05 Mon Sep 17 00:00:00 2001 From: Suraj Kota Date: Fri, 19 Aug 2022 01:35:20 +0000 Subject: [PATCH 3/3] l = 100 --- sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb index 39cbc5eda9..7c91e84339 100644 --- a/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb +++ b/sagemaker-python-sdk/pytorch_mnist/pytorch_mnist.ipynb @@ -111,9 +111,7 @@ "from torchvision.datasets import MNIST\n", "from torchvision import transforms\n", "\n", - "MNIST.mirrors = [\n", - " \"https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/\"\n", - "]\n", + "MNIST.mirrors = [\"https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/\"]\n", "\n", "MNIST(\n", " \"data\",\n", @@ -608,11 +606,7 @@ "\n", "data_dir = \"data/MNIST/raw\"\n", "with gzip.open(os.path.join(data_dir, \"t10k-images-idx3-ubyte.gz\"), \"rb\") as f:\n", - " images = (\n", - " np.frombuffer(f.read(), np.uint8, offset=16)\n", - " .reshape(-1, 28, 28)\n", - " .astype(np.float32)\n", - " )\n", + " images = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28).astype(np.float32)\n", "\n", "mask = random.sample(range(len(images)), 16) # randomly select some of the test images\n", "mask = np.array(mask, dtype=np.int)\n",