From 77d23e5e471e1b4dfc92a38f4565a82902bbde9b Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Fri, 16 Oct 2020 19:17:47 -0700
Subject: [PATCH 01/10] simple adapatation of mnist example from pytorch

---
 examples/mnist_template.py | 152 +++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 examples/mnist_template.py

diff --git a/examples/mnist_template.py b/examples/mnist_template.py
new file mode 100644
index 0000000..304ed1b
--- /dev/null
+++ b/examples/mnist_template.py
@@ -0,0 +1,152 @@
+from __future__ import print_function
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torch.optim.lr_scheduler import StepLR
+
+###### Hydra Block ######
+from omegaconf import MISSING
+import hydra
+from hydra.core.config_store import ConfigStore
+from dataclasses import dataclass
+
+# config schema imports
+from config.torch.optim import AdadeltaConf
+from config.torch.optim.lr_scheduler import StepLRConf
+
+@dataclass
+class CommonArgparseArgs:
+    stuff: int = 1
+
+@dataclass
+class ExportedArgparseArgs:
+    epochs: int = 14
+    batch_size: int = 64
+    test_batch_size: int = 1000
+    no_cuda: bool = False
+    save_model: bool = False
+    dry_run: bool = False
+    log_interval: int = 10
+    seed: int = 1
+
+@dataclass
+class MNISTStepLRConf(StepLRConf):
+    step_size: int = 1 
+
+@dataclass
+class MNISTNetConf:
+    args: ExportedArgparseArgs
+    optim: AdadeltaConf
+    scheduler: MNISTStepLRConf
+
+cs = ConfigStore.instance()
+cs.store(name="config", node=MNISTNetConf)
+
+###### / Hydra Block ######
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout2d(0.25)
+        self.dropout2 = nn.Dropout2d(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+            if args.dry_run:
+                break
+
+
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
+            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+
+@hydra.main(config_name='config')
+def main(cfg):
+    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
+    torch.manual_seed(cfg.args.seed)
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    train_kwargs = {'batch_size': cfg.args.batch_size}
+    test_kwargs = {'batch_size': cfg.args.test_batch_size}
+    if use_cuda:
+        cuda_kwargs = {'num_workers': 1,
+                       'pin_memory': True,
+                       'shuffle': True}
+        train_kwargs.update(cuda_kwargs)
+        test_kwargs.update(cuda_kwargs)
+
+    #transform=transforms.Compose([
+    #    transforms.ToTensor(),
+    #    transforms.Normalize((0.1307,), (0.3081,))
+    #    ])
+    #dataset1 = datasets.MNIST('../data', train=True, download=True,
+    #                   transform=transform)
+    #dataset2 = datasets.MNIST('../data', train=False,
+    #                   transform=transform)
+    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
+
+    model = Net().to(device)
+    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
+    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
+
+    for epoch in range(1, cfg.args.epochs + 1):
+        train(cfg.args, model, device, train_loader, optimizer, epoch)
+        test(model, device, test_loader)
+        scheduler.step()
+
+    if cfg.args.save_model:
+        torch.save(model.state_dict(), "mnist_cnn.pt")
+
+
+if __name__ == '__main__':
+    main()

From daad4ec2015eac3c3d8fbccb6927ac42cb51c81e Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sat, 17 Oct 2020 13:00:19 -0700
Subject: [PATCH 02/10] thinking through mnist ex organization

---
 examples/mnist_template.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/examples/mnist_template.py b/examples/mnist_template.py
index 304ed1b..118362c 100644
--- a/examples/mnist_template.py
+++ b/examples/mnist_template.py
@@ -37,13 +37,24 @@ class MNISTStepLRConf(StepLRConf):
     step_size: int = 1 
 
 @dataclass
-class MNISTNetConf:
+class MNISTModelConf:
+    # @package _group_ #is this how packages are done?
+    drop_prob: float = 0.2
+    in_features: 784 #configen auto gen these?
+    out_features: 10
+    hidden_dim: 1000
+    seed: 123
+
+@dataclass
+class MNISTConf:
     args: ExportedArgparseArgs
+    model: MNISTModelConf
     optim: AdadeltaConf
     scheduler: MNISTStepLRConf
 
+
 cs = ConfigStore.instance()
-cs.store(name="config", node=MNISTNetConf)
+cs.store(name="config", node=MNISTConf)
 
 ###### / Hydra Block ######
 

From 3051645ca4113accf44df10d646705c0a9648fe3 Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sun, 18 Oct 2020 16:18:41 -0700
Subject: [PATCH 03/10] Updates to compositional strategy.

---
 examples/mnist_template.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/examples/mnist_template.py b/examples/mnist_template.py
index 118362c..924fa55 100644
--- a/examples/mnist_template.py
+++ b/examples/mnist_template.py
@@ -8,6 +8,7 @@
 from torch.optim.lr_scheduler import StepLR
 
 ###### Hydra Block ######
+from typing import List, Any
 from omegaconf import MISSING
 import hydra
 from hydra.core.config_store import ConfigStore
@@ -20,6 +21,7 @@
 @dataclass
 class CommonArgparseArgs:
     stuff: int = 1
+    checkpoint_name: str = 'unnamed.pt'
 
 @dataclass
 class ExportedArgparseArgs:
@@ -32,25 +34,19 @@ class ExportedArgparseArgs:
     log_interval: int = 10
     seed: int = 1
 
-@dataclass
-class MNISTStepLRConf(StepLRConf):
-    step_size: int = 1 
-
 @dataclass
 class MNISTModelConf:
-    # @package _group_ #is this how packages are done?
     drop_prob: float = 0.2
-    in_features: 784 #configen auto gen these?
-    out_features: 10
-    hidden_dim: 1000
-    seed: 123
+    in_features: int = 784
+    out_features: int = 10
+    hidden_dim: int = 1000
 
 @dataclass
 class MNISTConf:
-    args: ExportedArgparseArgs
-    model: MNISTModelConf
-    optim: AdadeltaConf
-    scheduler: MNISTStepLRConf
+    args: ExportedArgparseArgs = ExportedArgparseArgs()
+    model: MNISTModelConf = MNISTModelConf()
+    optim: Any = AdadeltaConf()
+    scheduler: Any = StepLRConf(step_size=1)
 
 
 cs = ConfigStore.instance()
@@ -122,6 +118,8 @@ def test(model, device, test_loader):
 
 @hydra.main(config_name='config')
 def main(cfg):
+    print(cfg.pretty())
+    import ipdb; ipdb.set_trace()
     use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
     torch.manual_seed(cfg.args.seed)
     device = torch.device("cuda" if use_cuda else "cpu")

From ef7504e43b19f32ce9ba6ff6d879dcc4cbec135a Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sun, 18 Oct 2020 23:52:35 -0700
Subject: [PATCH 04/10] separate example into files with config excluding model
 and including model

---
 examples/00_mnist_template.py | 149 ++++++++++++++++++++++++++++
 examples/01_mnist_template.py | 182 ++++++++++++++++++++++++++++++++++
 2 files changed, 331 insertions(+)
 create mode 100644 examples/00_mnist_template.py
 create mode 100644 examples/01_mnist_template.py

diff --git a/examples/00_mnist_template.py b/examples/00_mnist_template.py
new file mode 100644
index 0000000..90715a9
--- /dev/null
+++ b/examples/00_mnist_template.py
@@ -0,0 +1,149 @@
+from __future__ import print_function
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torch.optim.lr_scheduler import StepLR
+
+###### Hydra Block ######
+from typing import List, Any
+from omegaconf import MISSING
+import hydra
+from hydra.core.config_store import ConfigStore
+from dataclasses import dataclass
+
+# config schema imports
+from config.torch.optim import AdadeltaConf
+from config.torch.optim.lr_scheduler import StepLRConf
+
+@dataclass
+class ExportedArgparseArgs:
+    epochs: int = 14
+    batch_size: int = 64
+    test_batch_size: int = 1000
+    no_cuda: bool = False
+    save_model: bool = False
+    dry_run: bool = False
+    log_interval: int = 10
+    seed: int = 1
+    checkpoint_name: str = 'unnamed.pt'
+
+@dataclass
+class MNISTConf:
+    args: ExportedArgparseArgs = ExportedArgparseArgs()
+    optim: Any = AdadeltaConf()
+    scheduler: Any = StepLRConf(step_size=1)
+
+
+cs = ConfigStore.instance()
+cs.store(name="config", node=MNISTConf)
+
+###### / Hydra Block ######
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout2d(0.25)
+        self.dropout2 = nn.Dropout2d(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+            if args.dry_run:
+                break
+
+
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
+            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+
+@hydra.main(config_name='config')
+def main(cfg):
+    print(cfg.pretty())
+    import ipdb; ipdb.set_trace()
+    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
+    torch.manual_seed(cfg.args.seed)
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    train_kwargs = {'batch_size': cfg.args.batch_size}
+    test_kwargs = {'batch_size': cfg.args.test_batch_size}
+    if use_cuda:
+        cuda_kwargs = {'num_workers': 1,
+                       'pin_memory': True,
+                       'shuffle': True}
+        train_kwargs.update(cuda_kwargs)
+        test_kwargs.update(cuda_kwargs)
+
+    #transform=transforms.Compose([
+    #    transforms.ToTensor(),
+    #    transforms.Normalize((0.1307,), (0.3081,))
+    #    ])
+    #dataset1 = datasets.MNIST('../data', train=True, download=True,
+    #                   transform=transform)
+    #dataset2 = datasets.MNIST('../data', train=False,
+    #                   transform=transform)
+    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
+
+    model = Net().to(device)
+    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
+    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
+
+    for epoch in range(1, cfg.args.epochs + 1):
+        train(cfg.args, model, device, train_loader, optimizer, epoch)
+        test(model, device, test_loader)
+        scheduler.step()
+
+    if cfg.args.save_model:
+        torch.save(model.state_dict(), cfg.args.checkpoint_name)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/01_mnist_template.py b/examples/01_mnist_template.py
new file mode 100644
index 0000000..19b1ac9
--- /dev/null
+++ b/examples/01_mnist_template.py
@@ -0,0 +1,182 @@
+from __future__ import print_function
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torch.optim.lr_scheduler import StepLR
+
+###### Hydra Block ######
+from typing import List, Any
+from omegaconf import MISSING
+import hydra
+from hydra.core.config_store import ConfigStore
+from dataclasses import dataclass
+
+# config schema imports
+from config.torch.optim import AdadeltaConf
+from config.torch.optim.lr_scheduler import StepLRConf
+
+@dataclass
+class ExportedArgparseArgs:
+    epochs: int = 14
+    batch_size: int = 64
+    test_batch_size: int = 1000
+    no_cuda: bool = False
+    save_model: bool = False
+    dry_run: bool = False
+    log_interval: int = 10
+    seed: int = 1
+    checkpoint_name: str = 'unnamed.pt'
+
+@dataclass
+class Conv2dConf:
+    in_channels: int = MISSING 
+    out_channels: int = MISSING 
+    kernel_size: int = 3
+    stride: int = 1
+
+@dataclass
+class DropoutConf:
+    p: float = MISSING
+
+@dataclass
+class Maxpool2dConf:
+    kernel_size: int = 2
+    stride: Any = None 
+    padding: int = 0
+    dilation: int = 1
+
+@dataclass
+class LinearConf:
+    in_features: int = MISSING
+    out_features: int = MISSING 
+
+@dataclass
+class MNISTNetConf:
+    conv1: Conv2dConf = Conv2dConf(in_channels=1, out_channels=32)
+    conv2: Conv2dConf = Conv2dConf(in_channels=32, out_channels=64)
+    dropout1: DropoutConf = DropoutConf(p=0.25)
+    dropout2: DropoutConf = DropoutConf(p=0.5)
+    maxpool: Maxpool2dConf = Maxpool2dConf()
+    linear1: LinearConf = LinearConf(in_features=9216, out_features=128)
+    linear2: LinearConf = LinearConf(in_features=128, out_features=10)
+
+@dataclass
+class MNISTConf:
+    args: ExportedArgparseArgs = ExportedArgparseArgs()
+    model: MNISTNetConf = MNISTNetConf()
+    optim: Any = AdadeltaConf()
+    scheduler: Any = StepLRConf(step_size=1)
+
+
+cs = ConfigStore.instance()
+cs.store(name="config", node=MNISTConf)
+
+###### / Hydra Block ######
+
+class Net(nn.Module):
+    def __init__(self, cfg):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(**cfg.conv1)
+        self.conv2 = nn.Conv2d(**cfg.conv2)
+        self.dropout1 = nn.Dropout2d(**cfg.dropout1)
+        self.dropout2 = nn.Dropout2d(**cfg.dropout2)
+        self.fc1 = nn.Linear(**cfg.linear1)
+        self.fc2 = nn.Linear(**cfg.linear2)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, **cfg.maxpool2d)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+            if args.dry_run:
+                break
+
+
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
+            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+
+@hydra.main(config_name='config')
+def main(cfg):
+    print(cfg.pretty())
+    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
+    torch.manual_seed(cfg.args.seed)
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    train_kwargs = {'batch_size': cfg.args.batch_size}
+    test_kwargs = {'batch_size': cfg.args.test_batch_size}
+    if use_cuda:
+        cuda_kwargs = {'num_workers': 1,
+                       'pin_memory': True,
+                       'shuffle': True}
+        train_kwargs.update(cuda_kwargs)
+        test_kwargs.update(cuda_kwargs)
+
+    #transform=transforms.Compose([
+    #    transforms.ToTensor(),
+    #    transforms.Normalize((0.1307,), (0.3081,))
+    #    ])
+    #dataset1 = datasets.MNIST('../data', train=True, download=True,
+    #                   transform=transform)
+    #dataset2 = datasets.MNIST('../data', train=False,
+    #                   transform=transform)
+    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
+
+    model = Net(cfg.model).to(device)
+    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
+    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
+
+    for epoch in range(1, cfg.args.epochs + 1):
+        train(cfg.args, model, device, train_loader, optimizer, epoch)
+        test(model, device, test_loader)
+        scheduler.step()
+
+    if cfg.args.save_model:
+        torch.save(model.state_dict(), cfg.args.checkpoint_name)
+
+
+if __name__ == '__main__':
+    main()

From 37f123eeb5f5207ab7546076648474f9a9b95886 Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sun, 18 Oct 2020 23:54:36 -0700
Subject: [PATCH 05/10] remove old mnist_template

---
 examples/mnist_template.py | 161 -------------------------------------
 1 file changed, 161 deletions(-)
 delete mode 100644 examples/mnist_template.py

diff --git a/examples/mnist_template.py b/examples/mnist_template.py
deleted file mode 100644
index 924fa55..0000000
--- a/examples/mnist_template.py
+++ /dev/null
@@ -1,161 +0,0 @@
-from __future__ import print_function
-import argparse
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-from torchvision import datasets, transforms
-from torch.optim.lr_scheduler import StepLR
-
-###### Hydra Block ######
-from typing import List, Any
-from omegaconf import MISSING
-import hydra
-from hydra.core.config_store import ConfigStore
-from dataclasses import dataclass
-
-# config schema imports
-from config.torch.optim import AdadeltaConf
-from config.torch.optim.lr_scheduler import StepLRConf
-
-@dataclass
-class CommonArgparseArgs:
-    stuff: int = 1
-    checkpoint_name: str = 'unnamed.pt'
-
-@dataclass
-class ExportedArgparseArgs:
-    epochs: int = 14
-    batch_size: int = 64
-    test_batch_size: int = 1000
-    no_cuda: bool = False
-    save_model: bool = False
-    dry_run: bool = False
-    log_interval: int = 10
-    seed: int = 1
-
-@dataclass
-class MNISTModelConf:
-    drop_prob: float = 0.2
-    in_features: int = 784
-    out_features: int = 10
-    hidden_dim: int = 1000
-
-@dataclass
-class MNISTConf:
-    args: ExportedArgparseArgs = ExportedArgparseArgs()
-    model: MNISTModelConf = MNISTModelConf()
-    optim: Any = AdadeltaConf()
-    scheduler: Any = StepLRConf(step_size=1)
-
-
-cs = ConfigStore.instance()
-cs.store(name="config", node=MNISTConf)
-
-###### / Hydra Block ######
-
-class Net(nn.Module):
-    def __init__(self):
-        super(Net, self).__init__()
-        self.conv1 = nn.Conv2d(1, 32, 3, 1)
-        self.conv2 = nn.Conv2d(32, 64, 3, 1)
-        self.dropout1 = nn.Dropout2d(0.25)
-        self.dropout2 = nn.Dropout2d(0.5)
-        self.fc1 = nn.Linear(9216, 128)
-        self.fc2 = nn.Linear(128, 10)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = F.relu(x)
-        x = self.conv2(x)
-        x = F.relu(x)
-        x = F.max_pool2d(x, 2)
-        x = self.dropout1(x)
-        x = torch.flatten(x, 1)
-        x = self.fc1(x)
-        x = F.relu(x)
-        x = self.dropout2(x)
-        x = self.fc2(x)
-        output = F.log_softmax(x, dim=1)
-        return output
-
-
-def train(args, model, device, train_loader, optimizer, epoch):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)
-        optimizer.zero_grad()
-        output = model(data)
-        loss = F.nll_loss(output, target)
-        loss.backward()
-        optimizer.step()
-        if batch_idx % args.log_interval == 0:
-            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                epoch, batch_idx * len(data), len(train_loader.dataset),
-                100. * batch_idx / len(train_loader), loss.item()))
-            if args.dry_run:
-                break
-
-
-def test(model, device, test_loader):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
-            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-
-    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-
-
-@hydra.main(config_name='config')
-def main(cfg):
-    print(cfg.pretty())
-    import ipdb; ipdb.set_trace()
-    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
-    torch.manual_seed(cfg.args.seed)
-    device = torch.device("cuda" if use_cuda else "cpu")
-
-    train_kwargs = {'batch_size': cfg.args.batch_size}
-    test_kwargs = {'batch_size': cfg.args.test_batch_size}
-    if use_cuda:
-        cuda_kwargs = {'num_workers': 1,
-                       'pin_memory': True,
-                       'shuffle': True}
-        train_kwargs.update(cuda_kwargs)
-        test_kwargs.update(cuda_kwargs)
-
-    #transform=transforms.Compose([
-    #    transforms.ToTensor(),
-    #    transforms.Normalize((0.1307,), (0.3081,))
-    #    ])
-    #dataset1 = datasets.MNIST('../data', train=True, download=True,
-    #                   transform=transform)
-    #dataset2 = datasets.MNIST('../data', train=False,
-    #                   transform=transform)
-    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
-    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
-
-    model = Net().to(device)
-    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
-    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
-
-    for epoch in range(1, cfg.args.epochs + 1):
-        train(cfg.args, model, device, train_loader, optimizer, epoch)
-        test(model, device, test_loader)
-        scheduler.step()
-
-    if cfg.args.save_model:
-        torch.save(model.state_dict(), "mnist_cnn.pt")
-
-
-if __name__ == '__main__':
-    main()

From 549e87c45f42065dfdc2a36b2c07b9f81d21e7b1 Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Mon, 19 Oct 2020 15:48:34 -0700
Subject: [PATCH 06/10] .

---
 examples/00_mnist_template.py | 2 +-
 examples/01_mnist_template.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/00_mnist_template.py b/examples/00_mnist_template.py
index 90715a9..58f6a16 100644
--- a/examples/00_mnist_template.py
+++ b/examples/00_mnist_template.py
@@ -107,7 +107,6 @@ def test(model, device, test_loader):
 @hydra.main(config_name='config')
 def main(cfg):
     print(cfg.pretty())
-    import ipdb; ipdb.set_trace()
     use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
     torch.manual_seed(cfg.args.seed)
     device = torch.device("cuda" if use_cuda else "cpu")
@@ -133,6 +132,7 @@ def main(cfg):
     #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
     model = Net().to(device)
+    # use hydra.utils.instantiate to instantiate the optimizer and the scheduler:
     optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
     scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
 
diff --git a/examples/01_mnist_template.py b/examples/01_mnist_template.py
index 19b1ac9..aeb1e4d 100644
--- a/examples/01_mnist_template.py
+++ b/examples/01_mnist_template.py
@@ -166,6 +166,7 @@ def main(cfg):
     #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
     model = Net(cfg.model).to(device)
+    # use hydra.utils.instantiate to instantiate the optimizer and the scheduler:
     optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
     scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
 

From 4cd9a420cb36ce2552a91df92d0e300027c26d15 Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sat, 24 Oct 2020 19:54:54 -0700
Subject: [PATCH 07/10] Draft MNIST basic tutorial file + markdown

---
 config/torch/optim/lr_scheduler.py            | 123 ---------
 examples/01_mnist_template.py                 | 183 -------------
 examples/mnist_00.md                          | 242 ++++++++++++++++++
 .../{00_mnist_template.py => mnist_00.py}     |  80 +++---
 4 files changed, 283 insertions(+), 345 deletions(-)
 delete mode 100644 config/torch/optim/lr_scheduler.py
 delete mode 100644 examples/01_mnist_template.py
 create mode 100644 examples/mnist_00.md
 rename examples/{00_mnist_template.py => mnist_00.py} (66%)

diff --git a/config/torch/optim/lr_scheduler.py b/config/torch/optim/lr_scheduler.py
deleted file mode 100644
index 80450a8..0000000
--- a/config/torch/optim/lr_scheduler.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-#
-# Generated by configen, do not edit.
-# See https://github.com/facebookresearch/hydra/tree/master/tools/configen
-# fmt: off
-# isort:skip_file
-# flake8: noqa
-
-from dataclasses import dataclass, field
-from omegaconf import MISSING
-from typing import Any
-
-
-@dataclass
-class LambdaLRConf:
-    _target_: str = "torch.optim.lr_scheduler.LambdaLR"
-    optimizer: Any = MISSING
-    lr_lambda: Any = MISSING
-    last_epoch: Any = -1
-
-
-@dataclass
-class MultiplicativeLRConf:
-    _target_: str = "torch.optim.lr_scheduler.MultiplicativeLR"
-    optimizer: Any = MISSING
-    lr_lambda: Any = MISSING
-    last_epoch: Any = -1
-
-
-@dataclass
-class StepLRConf:
-    _target_: str = "torch.optim.lr_scheduler.StepLR"
-    optimizer: Any = MISSING
-    step_size: Any = MISSING
-    gamma: Any = 0.1
-    last_epoch: Any = -1
-
-
-@dataclass
-class MultiStepLRConf:
-    _target_: str = "torch.optim.lr_scheduler.MultiStepLR"
-    optimizer: Any = MISSING
-    milestones: Any = MISSING
-    gamma: Any = 0.1
-    last_epoch: Any = -1
-
-
-@dataclass
-class ExponentialLRConf:
-    _target_: str = "torch.optim.lr_scheduler.ExponentialLR"
-    optimizer: Any = MISSING
-    gamma: Any = MISSING
-    last_epoch: Any = -1
-
-
-@dataclass
-class CosineAnnealingLRConf:
-    _target_: str = "torch.optim.lr_scheduler.CosineAnnealingLR"
-    optimizer: Any = MISSING
-    T_max: Any = MISSING
-    eta_min: Any = 0
-    last_epoch: Any = -1
-
-
-@dataclass
-class ReduceLROnPlateauConf:
-    _target_: str = "torch.optim.lr_scheduler.ReduceLROnPlateau"
-    optimizer: Any = MISSING
-    mode: str = 'min'
-    factor: Any = 0.1
-    patience: Any = 10
-    verbose: Any = False
-    threshold: Any = 0.0001
-    threshold_mode: str = 'rel'
-    cooldown: Any = 0
-    min_lr: Any = 0
-    eps: Any = 1e-08
-
-
-@dataclass
-class CyclicLRConf:
-    _target_: str = "torch.optim.lr_scheduler.CyclicLR"
-    optimizer: Any = MISSING
-    base_lr: Any = MISSING
-    max_lr: Any = MISSING
-    step_size_up: Any = 2000
-    step_size_down: Any = None
-    mode: str = 'triangular'
-    gamma: Any = 1.0
-    scale_fn: Any = None
-    scale_mode: str = 'cycle'
-    cycle_momentum: Any = True
-    base_momentum: Any = 0.8
-    max_momentum: Any = 0.9
-    last_epoch: Any = -1
-
-
-@dataclass
-class CosineAnnealingWarmRestartsConf:
-    _target_: str = "torch.optim.lr_scheduler.CosineAnnealingWarmRestarts"
-    optimizer: Any = MISSING
-    T_0: Any = MISSING
-    T_mult: Any = 1
-    eta_min: Any = 0
-    last_epoch: Any = -1
-
-
-@dataclass
-class OneCycleLRConf:
-    _target_: str = "torch.optim.lr_scheduler.OneCycleLR"
-    optimizer: Any = MISSING
-    max_lr: Any = MISSING
-    total_steps: Any = None
-    epochs: Any = None
-    steps_per_epoch: Any = None
-    pct_start: Any = 0.3
-    anneal_strategy: str = 'cos'
-    cycle_momentum: Any = True
-    base_momentum: Any = 0.85
-    max_momentum: Any = 0.95
-    div_factor: Any = 25.0
-    final_div_factor: Any = 10000.0
-    last_epoch: Any = -1
diff --git a/examples/01_mnist_template.py b/examples/01_mnist_template.py
deleted file mode 100644
index aeb1e4d..0000000
--- a/examples/01_mnist_template.py
+++ /dev/null
@@ -1,183 +0,0 @@
-from __future__ import print_function
-import argparse
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-from torchvision import datasets, transforms
-from torch.optim.lr_scheduler import StepLR
-
-###### Hydra Block ######
-from typing import List, Any
-from omegaconf import MISSING
-import hydra
-from hydra.core.config_store import ConfigStore
-from dataclasses import dataclass
-
-# config schema imports
-from config.torch.optim import AdadeltaConf
-from config.torch.optim.lr_scheduler import StepLRConf
-
-@dataclass
-class ExportedArgparseArgs:
-    epochs: int = 14
-    batch_size: int = 64
-    test_batch_size: int = 1000
-    no_cuda: bool = False
-    save_model: bool = False
-    dry_run: bool = False
-    log_interval: int = 10
-    seed: int = 1
-    checkpoint_name: str = 'unnamed.pt'
-
-@dataclass
-class Conv2dConf:
-    in_channels: int = MISSING 
-    out_channels: int = MISSING 
-    kernel_size: int = 3
-    stride: int = 1
-
-@dataclass
-class DropoutConf:
-    p: float = MISSING
-
-@dataclass
-class Maxpool2dConf:
-    kernel_size: int = 2
-    stride: Any = None 
-    padding: int = 0
-    dilation: int = 1
-
-@dataclass
-class LinearConf:
-    in_features: int = MISSING
-    out_features: int = MISSING 
-
-@dataclass
-class MNISTNetConf:
-    conv1: Conv2dConf = Conv2dConf(in_channels=1, out_channels=32)
-    conv2: Conv2dConf = Conv2dConf(in_channels=32, out_channels=64)
-    dropout1: DropoutConf = DropoutConf(p=0.25)
-    dropout2: DropoutConf = DropoutConf(p=0.5)
-    maxpool: Maxpool2dConf = Maxpool2dConf()
-    linear1: LinearConf = LinearConf(in_features=9216, out_features=128)
-    linear2: LinearConf = LinearConf(in_features=128, out_features=10)
-
-@dataclass
-class MNISTConf:
-    args: ExportedArgparseArgs = ExportedArgparseArgs()
-    model: MNISTNetConf = MNISTNetConf()
-    optim: Any = AdadeltaConf()
-    scheduler: Any = StepLRConf(step_size=1)
-
-
-cs = ConfigStore.instance()
-cs.store(name="config", node=MNISTConf)
-
-###### / Hydra Block ######
-
-class Net(nn.Module):
-    def __init__(self, cfg):
-        super(Net, self).__init__()
-        self.conv1 = nn.Conv2d(**cfg.conv1)
-        self.conv2 = nn.Conv2d(**cfg.conv2)
-        self.dropout1 = nn.Dropout2d(**cfg.dropout1)
-        self.dropout2 = nn.Dropout2d(**cfg.dropout2)
-        self.fc1 = nn.Linear(**cfg.linear1)
-        self.fc2 = nn.Linear(**cfg.linear2)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = F.relu(x)
-        x = self.conv2(x)
-        x = F.relu(x)
-        x = F.max_pool2d(x, **cfg.maxpool2d)
-        x = self.dropout1(x)
-        x = torch.flatten(x, 1)
-        x = self.fc1(x)
-        x = F.relu(x)
-        x = self.dropout2(x)
-        x = self.fc2(x)
-        output = F.log_softmax(x, dim=1)
-        return output
-
-
-def train(args, model, device, train_loader, optimizer, epoch):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)
-        optimizer.zero_grad()
-        output = model(data)
-        loss = F.nll_loss(output, target)
-        loss.backward()
-        optimizer.step()
-        if batch_idx % args.log_interval == 0:
-            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                epoch, batch_idx * len(data), len(train_loader.dataset),
-                100. * batch_idx / len(train_loader), loss.item()))
-            if args.dry_run:
-                break
-
-
-def test(model, device, test_loader):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
-            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-
-    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-
-
-@hydra.main(config_name='config')
-def main(cfg):
-    print(cfg.pretty())
-    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
-    torch.manual_seed(cfg.args.seed)
-    device = torch.device("cuda" if use_cuda else "cpu")
-
-    train_kwargs = {'batch_size': cfg.args.batch_size}
-    test_kwargs = {'batch_size': cfg.args.test_batch_size}
-    if use_cuda:
-        cuda_kwargs = {'num_workers': 1,
-                       'pin_memory': True,
-                       'shuffle': True}
-        train_kwargs.update(cuda_kwargs)
-        test_kwargs.update(cuda_kwargs)
-
-    #transform=transforms.Compose([
-    #    transforms.ToTensor(),
-    #    transforms.Normalize((0.1307,), (0.3081,))
-    #    ])
-    #dataset1 = datasets.MNIST('../data', train=True, download=True,
-    #                   transform=transform)
-    #dataset2 = datasets.MNIST('../data', train=False,
-    #                   transform=transform)
-    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
-    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
-
-    model = Net(cfg.model).to(device)
-    # use hydra.utils.instantiate to instantiate the optimizer and the scheduler:
-    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
-    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
-
-    for epoch in range(1, cfg.args.epochs + 1):
-        train(cfg.args, model, device, train_loader, optimizer, epoch)
-        test(model, device, test_loader)
-        scheduler.step()
-
-    if cfg.args.save_model:
-        torch.save(model.state_dict(), cfg.args.checkpoint_name)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/mnist_00.md b/examples/mnist_00.md
new file mode 100644
index 0000000..95eab72
--- /dev/null
+++ b/examples/mnist_00.md
@@ -0,0 +1,242 @@
+# MNIST Basic Tutorial
+
+This tutorial series is built around the [PyTorch MNIST example] and is meant to demonstrate how to modify your PyTorch code to be configured by Hydra. We will start with the simplest case which introduces one central concept while minimizing altered code. In the following tutorials ([Intermediate][Intermediate Tutorial] and [Advanced][Advanced Tutorial]), we will show how a few additional changes can yield a very powerful end product.
+
+The source file can be found in [mnist_00.py]
+
+### The 'HYDRA BLOCK'
+***
+For clarity in this tutorial, as we modify the [PyTorch MNIST example], will make the diffs explicit. Most of the changes we introduce will be at the top of the file within the commented `##### HYDRA BLOCK #####`, though in practice much of this block could reside in its own concise imported file.
+
+### Imports
+```python
+import hydra
+from hydra.core.config_store import ConfigStore
+from typing import List, Any
+from omegaconf import 
+from dataclasses import dataclass
+
+# config schema imports
+from config.torch.optim import AdadeltaConf
+from config.torch.optim.lr_scheduler import StepLRConf
+```
+
+There are two areas in our Hydra-specific imports. First, since we define configs in this file, we need access to the following:
+- the `ConfigStore`
+- typing from both `typing` and `omegaconf`
+- the `dataclass` decorator
+
+##### Config Store
+*where we store our configs*
+
+Briefly, the concept behind the `ConfigStore` is to create a singleton object of this class and register all config objects to it. This tutorial demonstrates the simplest approach to using the `ConfigStore`.
+
+##### Config Schema
+*our config templates - providing type checking and good defaults*
+
+Second, we import two [config schema] from `hydra-torch`. Think of config schema as recommended templates for commonly used configurations. `hydra-torch` provides config schema for a large subset of common PyTorch classes. In the basic tutorial, we only consider the schema for the PyTorch classes:
+- `Adadelta` which resides in `torch.optim`
+- `StepLR` which resides in `torch.optim.lr_scheduler`
+
+Note that the naming convention for the import heirarchy mimics that of `torch`. We correspondingly import the following config schema:
+- `AdadeltaConf` from `config.torch.optim`
+- `StepLRConf` from `config.torch.optim.lr_scheduler`
+
+We try to preserve the naming convention of using the suffix `-Conf` at all times to distinguish the config schema class from the class of the object that is to be configured.
+
+### Top Level Config
+After importing two pre-defined config schema for components in our training pipeline, the optimizer and scheduler, we still need a "top level" config to merge everything. We can call this config class `MNISTConf`. You will notice that this class is nothing more than a python `dataclass` and corresponds to, you guessed it, a *config schema*. We are responsible for writing this since it is not a standard class from pytorch that `hydra-torch` has a schema for.
+
+We can start this out with the configs we know we will need for the optimizer (`Adadelta`) and scheduler (`StepLR`):
+```python
+# our top level config:
+@dataclass
+class MNISTConf:
+    adadelta: Any = AdadeltaConf()
+    steplr: Any = StepLRConf(step_size=1)
+```
+Note that for `StepLRConf()` we need to pass `step_size=1` when we initialize it because it's default value is `MISSING`:
+```python
+# the class imported from: config.torch.optim.lr_scheduler:
+@dataclass
+class StepLRConf:
+    _target_: str = "torch.optim.lr_scheduler.StepLR"
+    optimizer: Any = MISSING
+    step_size: Any = MISSING
+    gamma: Any = 0.1
+    last_epoch: Any = -1
+```
+Later, we will pass the optimizer (also default `MISSING`) as a passed through argument when the actual `StepLR` object is instantiated.
+
+### Add the Top Level Conf to the ConfigStore
+Very simply, we add the top-level config class `MNISTConf` to the `ConfigStore` in two lines:
+```python
+cs = ConfigStore.instance()
+cs.store(name="config", node=MNISTConf)
+```
+The name `config` will be passed to the `@hydra` decordator when we get to `main()`.
+
+***
+### Parting with Argparse
+
+Now we're starting to realize our relationship with `argparse` isn't as serious as we thought it was. Although `argparse` is powerful, we can take it a step further. In the process we hope to introduce greater organization and free our primary file from as much boilerplate as possible.
+
+One feature Hydra provides us is aggregating our configuration files alongside any 'specifications' we pass via command line arguments. What this means is as long as we have the configuration file which defines possible arguments like `save_model` or `dry_run`, there is no need to also litter our code with `argparse` definitions.
+
+This whole block in `main()`:
+```python
+def main():
+# Training settings
+    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
+    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
+                        help='input batch size for training (default: 64)')
+    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
+                        help='input batch size for testing (default: 1000)')
+    parser.add_argument('--epochs', type=int, default=14, metavar='N',
+                        help='number of epochs to train (default: 14)')
+    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
+                        help='learning rate (default: 1.0)')
+    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
+                        help='Learning rate step gamma (default: 0.7)')
+    parser.add_argument('--no-cuda', action='store_true', default=False,
+                        help='disables CUDA training')
+    parser.add_argument('--dry-run', action='store_true', default=False,
+                        help='quickly check a single pass')
+    parser.add_argument('--seed', type=int, default=1, metavar='S',
+                        help='random seed (default: 1)')
+    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
+                        help='how many batches to wait before logging training status')
+    parser.add_argument('--save-model', action='store_true', default=False,
+                        help='For Saving the current Model')
+    args = parser.parse_args()
+```
+becomes:
+```python
+def main(cfg):
+# All argparse args now reside in cfg
+```
+Our initial strategy is to dump these arguments directly in our top-level configuration.
+```python
+@dataclass
+class MNISTConf:
+    batch_size: int = 64
+    test_batch_size: int = 1000
+    epochs: int = 14
+    lr: float = 1.0 # REMOVE THIS SINCE IT IS NOW WITHIN `AdadeltaConf` BELOW
+    gamma: float = 0.7 # REMOVE THIS SINCE IT IS NOW WITHIN `AdadeltaConf` BELOW
+    no_cuda: bool = False
+    dry_run: bool = False
+    seed: int = 1
+    log_interval: int 
+    save_model: bool = False
+    adadelta: Any = AdadeltaConf()
+    steplr: Any = StepLRConf(step_size=1)
+```
+This works, but can feel a bit flat and disorganized (much like `argparse` args can be). Note, we also sacrifice `help` strings. Don't worry, we will remedy both of these concerns down the road.
+
+Now our `argparse` args are at the same level as our optimizer and scheduler configs. We will remove `lr` and `gamma` since they are already present within the optimizer config `AdadeltaConf`.
+***
+### Dropping into `main()`
+Now that we've defined all of our configs, we just need to let Hydra create our `cfg` object at runtime and make sure the `cfg` is plumbed to any object we want it to configure.
+```python
+@hydra.main(config_name='config')
+def main(cfg):
+    print(cfg.pretty())
+    ...
+```
+The single idea here is that `@hydra.main` looks for a config in the `ConfigStore` instance, `cs` named "`config`". It finds `MNISTConf` (our top level conf) and populates `cfg` inside `main()` with the entire structured config including our optimizer config, `cfg.adadelta` and our scheduler config, `cfg.steplr`.
+
+Instrumenting `main()` is simple. Anywhere we find `args`, replace this with `cfg` since we put all of the `argparse` arguments at the top level. For example, `args.batch_size` becomes `cfg.batch_size`:
+```python
+# the first few lines of main
+    ...
+    use_cuda = not cfg.no_cuda and torch.cuda.is_available() # DIFF args.no_cuda
+        torch.manual_seed(cfg.seed) # DIFF args.seed
+        device = torch.device("cuda" if use_cuda else "cpu")
+
+        train_kwargs = {'batch_size': cfg.batch_size} # DIFF args.batch_size
+        test_kwargs = {'batch_size': cfg.test_batch_size} # DIFF args.test_batch_size
+    ...
+```
+
+
+### Instantiating the optimizer and scheduler
+Still inside `main()`, we want to draw attention to two slightly special cases before moving on. Both the `optimizer` and `scheduler` are instantiated manually by specifying each argument with its `cfg` equivalent. Note that since these are nested fields, each of these parameters is two levels down e.g. `lr=args.learning_rate` becomes `lr=cfg.adadelta.lr`.
+
+```python
+ optimizer = Adadelta(lr=cfg.adadelta.lr, #DIFF lr=args.learning_rate
+                         rho=cfg.adadelta.rho,
+                         eps=cfg.adadelta.eps, 
+                         weight_decay=cfg.adadelta.weight_decay,
+                         params=model.parameters()
+ ```               
+In the case of the `optimizer`, one argument is not a part of our config -- `params`. If it wasn't obvious, this needs to be passed from the initialized `Net()` model. In the config schema that initialized `cfg.adadelta`, `params` is default to `MISSING`. The same is true of the `optimizer` field in `StepLRConf`.
+
+```python
+scheduler = StepLR(step_size=cfg.steplr.step_size,
+                       gamma=cfg.steplr.gamma,
+                       last_epoch=cfg.steplr.last_epoch,
+                       optimizer=optimizer
+ ```       
+ This method for instantiation is the least invasive to the original code, but it is also the least flexible and highly verbose. Check out the [Intermediate Tutorial] for a better approach that will allow us to hotswap optimizers and schedulers, all while writing less code.
+
+### Running with Hydra
+
+```bash
+$ python 00_minst.py
+```
+That's it. Since the `@hydra.main` decorator is above `def main(cfg)`, Hydra will manage the command line, logging, and saving outputs to a date/time stamped directory automatically. These are all configurable, but the default behavior ensures expected functionality. For example, if a model checkpoint is saved, it will appear in a new directory `./outputs/DATE/TIME/`.
+
+#### New Superpowers
+
+##### Command Line Overrides
+
+Much like passing argparse args through the CLI, we can use our default values specified in `MNISTConf` and override only the arguments/parameters we want to tweak:
+
+```bash
+$  python mnist_00.py epochs=1 save_model=True checkpoint_name='experiment0.pt'
+```
+
+For more on command line overrides, see: [Hydra CLI] and [Hydra override syntax].
+
+##### Multirun
+We often end up wanting to sweep our optimizer's learning rate. Here's how Hydra can help facilitate:
+```bash
+$ python mnist_00.py -m adadelta.lr="0.001, 0.01, 0.1"
+```
+Notice the `-m` which indicates we want to schedule 3 jobs where the learning rate changes by an order of magnitude across each training session.
+
+It can be useful to test multirun outputs by passing `dry_run=True` and setting `epochs=1`:
+```bash
+$ python mnist_00.py -m epochs=1 dry_run=True adadelta.lr="0.001,0.01, 0.1"
+```
+
+`Note:` these jobs can be dispatched to different resources and run in parallel or scheduled to run serially (by default). More info on multirun: [Hydra Multirun]. Hydra can use different hyperparameter search tools as well. See: [Hydra Ax plugin] and [Hydra Nevergrad plugin].
+
+### Summary
+In this tutorial, we demonstrated the path of least resistance to configuring your existing PyTorch code with Hydra. The main benefits we get from the 'Basic' level are:
+- No more boilerplate `argparse` taking up precious linecount
+- All training related arguments (`epochs`, `save_model`, etc.)  are now configurable via Hydra.
+- **All** optimizer/scheduler (`Adadelta`/`StepLR`) arguments are exposed for configuration (beyond only the ones the user write argparse lines for)
+- We have offloaded the book-keeping of compatible `argparse` code to Hydra via `hydra-torch` which runs tests ensuring all arguments track the API for the correct version of `pytorch`.
+
+However, there are some limitations in our current strategy that the [Intermediate Tutorial] will address. Namely:
+- Configuring the model (*think architecture search*)
+- Configuring the dataset (*think transfer learning*)
+- Swapping in and out different Optimizers/Schedulers
+
+Once comfortable with the basics, continue on to the [Intermediate Tutorial]. 
+
+[//]: # (These are reference links used in the body of this note and get stripped out when the markdown processor does its job. There is no need to format nicely because it shouldn't be seen. Thanks SO - http://stackoverflow.com/questions/4823468/store-comments-in-markdown-syntax)
+   [pytorch mnist example]: <https://github.com/pytorch/examples/blob/master/mnist/main.py>
+   [mnist_00.py]: mnist_00.py
+   [config schema]: <https://hydra.cc/docs/tutorials/structured_config/schema>
+   [hydra structured configs example]: <https://hydra.cc/docs/tutorials/structured_config/minimal_example>
+   [hydra terminology]: <https://hydra.cc/docs/terminology>
+   [hydra cli]: <https://hydra.cc/docs/tutorials/basic/your_first_app/simple_cli>
+   [hydra override syntax]: <https://hydra.cc/docs/advanced/override_grammar/basic>
+   [hydra multirun]: <https://hydra.cc/docs/tutorials/basic/running_your_app/multi-run>
+   [hydra ax plugin]: <https://hydra.cc/docs/plugins/nevergrad_sweeper>
+   [hydra nevergrad plugin]: <https://hydra.cc/docs/plugins/nevergrad_sweeper>
+   [Intermediate Tutorial]: <mnist_01.md>
+   [Advanced Tutorial]: <mnist_02.md>
diff --git a/examples/00_mnist_template.py b/examples/mnist_00.py
similarity index 66%
rename from examples/00_mnist_template.py
rename to examples/mnist_00.py
index 58f6a16..3bf8633 100644
--- a/examples/00_mnist_template.py
+++ b/examples/mnist_00.py
@@ -1,46 +1,41 @@
 from __future__ import print_function
-import argparse
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 from torchvision import datasets, transforms
+from torch.optim import Adadelta
 from torch.optim.lr_scheduler import StepLR
 
-###### Hydra Block ######
-from typing import List, Any
-from omegaconf import MISSING
+###### HYDRA BLOCK ######
 import hydra
 from hydra.core.config_store import ConfigStore
+from typing import List, Any
+from omegaconf import MISSING
 from dataclasses import dataclass
 
-# config schema imports
+# structured config imports
 from config.torch.optim import AdadeltaConf
 from config.torch.optim.lr_scheduler import StepLRConf
 
 @dataclass
-class ExportedArgparseArgs:
-    epochs: int = 14
+class MNISTConf:
     batch_size: int = 64
     test_batch_size: int = 1000
+    epochs: int = 14
     no_cuda: bool = False
-    save_model: bool = False
     dry_run: bool = False
-    log_interval: int = 10
     seed: int = 1
+    log_interval: int = 10
+    save_model: bool = False
     checkpoint_name: str = 'unnamed.pt'
-
-@dataclass
-class MNISTConf:
-    args: ExportedArgparseArgs = ExportedArgparseArgs()
-    optim: Any = AdadeltaConf()
-    scheduler: Any = StepLRConf(step_size=1)
-
+    adadelta: Any = AdadeltaConf()
+    steplr: Any = StepLRConf(step_size=1) # we pass a default for step_size since it is required, but missing a default in PyTorch (and consequently in hydra-torch)
 
 cs = ConfigStore.instance()
 cs.store(name="config", node=MNISTConf)
 
-###### / Hydra Block ######
+###### / HYDRA BLOCK ######
 
 class Net(nn.Module):
     def __init__(self):
@@ -105,14 +100,14 @@ def test(model, device, test_loader):
 
 
 @hydra.main(config_name='config')
-def main(cfg):
+def main(cfg): # DIFF
     print(cfg.pretty())
-    use_cuda = not cfg.args.no_cuda and torch.cuda.is_available()
-    torch.manual_seed(cfg.args.seed)
+    use_cuda = not cfg.no_cuda and torch.cuda.is_available() # DIFF 
+    torch.manual_seed(cfg.seed) # DIFF
     device = torch.device("cuda" if use_cuda else "cpu")
 
-    train_kwargs = {'batch_size': cfg.args.batch_size}
-    test_kwargs = {'batch_size': cfg.args.test_batch_size}
+    train_kwargs = {'batch_size': cfg.batch_size} # DIFF
+    test_kwargs = {'batch_size': cfg.test_batch_size} # DIFF
     if use_cuda:
         cuda_kwargs = {'num_workers': 1,
                        'pin_memory': True,
@@ -120,29 +115,36 @@ def main(cfg):
         train_kwargs.update(cuda_kwargs)
         test_kwargs.update(cuda_kwargs)
 
-    #transform=transforms.Compose([
-    #    transforms.ToTensor(),
-    #    transforms.Normalize((0.1307,), (0.3081,))
-    #    ])
-    #dataset1 = datasets.MNIST('../data', train=True, download=True,
-    #                   transform=transform)
-    #dataset2 = datasets.MNIST('../data', train=False,
-    #                   transform=transform)
-    #train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
-    #test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
+    transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307,), (0.3081,))
+        ])
+    dataset1 = datasets.MNIST('../data', train=True, download=True,
+                       transform=transform)
+    dataset2 = datasets.MNIST('../data', train=False,
+                       transform=transform)
+    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
     model = Net().to(device)
-    # use hydra.utils.instantiate to instantiate the optimizer and the scheduler:
-    optimizer = hydra.utils.instantiate(cfg.optim, params=model.parameters())
-    scheduler = hydra.utils.instantiate(cfg.scheduler, optimizer=optimizer)
 
-    for epoch in range(1, cfg.args.epochs + 1):
-        train(cfg.args, model, device, train_loader, optimizer, epoch)
+    optimizer = Adadelta(lr=cfg.adadelta.lr,
+                         rho=cfg.adadelta.rho,
+                         eps=cfg.adadelta.eps,
+                         weight_decay=cfg.adadelta.weight_decay,
+                         params=model.parameters()) # DIFF
+    scheduler = StepLR(step_size=cfg.steplr.step_size,
+                       gamma=cfg.steplr.gamma,
+                       last_epoch=cfg.steplr.last_epoch,
+                       optimizer=optimizer) # DIFF
+
+    for epoch in range(1, cfg.epochs + 1): # DIFF
+        train(cfg, model, device, train_loader, optimizer, epoch) # DIFF
         test(model, device, test_loader)
         scheduler.step()
 
-    if cfg.args.save_model:
-        torch.save(model.state_dict(), cfg.args.checkpoint_name)
+    if cfg.save_model: # DIFF
+        torch.save(model.state_dict(), cfg.checkpoint_name) # DIFF
 
 
 if __name__ == '__main__':

From 7d88d5f81aaf9537cdd853d6fe2cea0e3b1635e9 Mon Sep 17 00:00:00 2001
From: Rosario Scalise <rosario@cs.washington.edu>
Date: Sat, 24 Oct 2020 21:52:54 -0700
Subject: [PATCH 08/10] Formatting on mnist_00.md.

Will do another read through tomorrow.
---
 examples/mnist_00.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/mnist_00.md b/examples/mnist_00.md
index 95eab72..1e86ea1 100644
--- a/examples/mnist_00.md
+++ b/examples/mnist_00.md
@@ -4,8 +4,9 @@ This tutorial series is built around the [PyTorch MNIST example] and is meant to
 
 The source file can be found in [mnist_00.py]
 
-### The 'HYDRA BLOCK'
 ***
+## The 'HYDRA BLOCK'
+
 For clarity in this tutorial, as we modify the [PyTorch MNIST example], will make the diffs explicit. Most of the changes we introduce will be at the top of the file within the commented `##### HYDRA BLOCK #####`, though in practice much of this block could reside in its own concise imported file.
 
 ### Imports
@@ -74,7 +75,7 @@ Very simply, we add the top-level config class `MNISTConf` to the `ConfigStore`
 cs = ConfigStore.instance()
 cs.store(name="config", node=MNISTConf)
 ```
-The name `config` will be passed to the `@hydra` decordator when we get to `main()`.
+The name `config` will be passed to the `@hydra` decorator when we get to `main()`.
 
 ***
 ### Parting with Argparse
@@ -144,7 +145,7 @@ def main(cfg):
     print(cfg.pretty())
     ...
 ```
-The single idea here is that `@hydra.main` looks for a config in the `ConfigStore` instance, `cs` named "`config`". It finds `MNISTConf` (our top level conf) and populates `cfg` inside `main()` with the entire structured config including our optimizer config, `cfg.adadelta` and our scheduler config, `cfg.steplr`.
+The single idea here is that `@hydra.main` looks for a config in the `ConfigStore` instance, `cs` named "`config`". It finds `MNISTConf` (our top level conf) and populates `cfg` inside `main()` with the entire structured config including our optimizer and scheduler configs, `cfg.adadelta` and `cfg.steplr` respectively.
 
 Instrumenting `main()` is simple. Anywhere we find `args`, replace this with `cfg` since we put all of the `argparse` arguments at the top level. For example, `args.batch_size` becomes `cfg.batch_size`:
 ```python
@@ -179,7 +180,8 @@ scheduler = StepLR(step_size=cfg.steplr.step_size,
                        optimizer=optimizer
  ```       
  This method for instantiation is the least invasive to the original code, but it is also the least flexible and highly verbose. Check out the [Intermediate Tutorial] for a better approach that will allow us to hotswap optimizers and schedulers, all while writing less code.
-
+ 
+***
 ### Running with Hydra
 
 ```bash
@@ -213,6 +215,7 @@ $ python mnist_00.py -m epochs=1 dry_run=True adadelta.lr="0.001,0.01, 0.1"
 
 `Note:` these jobs can be dispatched to different resources and run in parallel or scheduled to run serially (by default). More info on multirun: [Hydra Multirun]. Hydra can use different hyperparameter search tools as well. See: [Hydra Ax plugin] and [Hydra Nevergrad plugin].
 
+***
 ### Summary
 In this tutorial, we demonstrated the path of least resistance to configuring your existing PyTorch code with Hydra. The main benefits we get from the 'Basic' level are:
 - No more boilerplate `argparse` taking up precious linecount

From cb22511be7cea8a542b1b229f4c55839936511ff Mon Sep 17 00:00:00 2001
From: rosario <rosario@cs.washington.edu>
Date: Sun, 25 Oct 2020 19:59:21 -0700
Subject: [PATCH 09/10] Incorporate feedback; formatting

---
 examples/mnist_00.md | 123 +++++++++++++++++++++++++++----------------
 examples/mnist_00.py | 115 +++++++++++++++++++++++-----------------
 2 files changed, 144 insertions(+), 94 deletions(-)

diff --git a/examples/mnist_00.md b/examples/mnist_00.md
index 1e86ea1..d247341 100644
--- a/examples/mnist_00.md
+++ b/examples/mnist_00.md
@@ -1,8 +1,30 @@
 # MNIST Basic Tutorial
 
-This tutorial series is built around the [PyTorch MNIST example] and is meant to demonstrate how to modify your PyTorch code to be configured by Hydra. We will start with the simplest case which introduces one central concept while minimizing altered code. In the following tutorials ([Intermediate][Intermediate Tutorial] and [Advanced][Advanced Tutorial]), we will show how a few additional changes can yield a very powerful end product.
+This tutorial series is built around the [PyTorch MNIST example] and is meant to demonstrate how to modify your PyTorch code to be configured by Hydra. We will start with the simplest case which introduces one central concept while minimizing altered code. In the following tutorials ([Intermediate][Intermediate Tutorial] and [Advanced][Advanced Tutorial]), we will show how a few additional changes can yield an even more powerful end product.
+
+The source file can be found at [mnist_00.py].
+
+### Pre-reading
+Although this tutorial is aimed at being self-contained, taking a look through Hydra's terminology as well as the basic and advanced tutorials couldn't hurt.
+
+1. [Hydra Terminology]
+2. [Hydra Basic Tutorial]
+3. [Hydra Structured Configs Tutorial]
+
+### Contents
+
+1. [The Hydra Block](#the-hydra-block)
+    1. [Imports](#imports)
+    2. [Parting with Argparse](#parting-with-argparse)
+    3. [Top Level Config](#top-level-config)
+    4. [Adding the Top Level Config to the ConfigStore](#adding-the-top-level-config-to-the-configstore)
+2. [Dropping into `main()`](#dropping-into-main)
+	1. [Instantiating the Optimizer and Scheduler](#instantiating-the-optimizer-and-scheduler)
+3. [Running with Hydra](#running-with-hydra)
+	1. [Commandline Overrides](#command-line-overrides)
+	2. [Multirun](#multirun)
+4. [Summary](#summary)
 
-The source file can be found in [mnist_00.py]
 
 ***
 ## The 'HYDRA BLOCK'
@@ -12,10 +34,10 @@ For clarity in this tutorial, as we modify the [PyTorch MNIST example], will mak
 ### Imports
 ```python
 import hydra
-from hydra.core.config_store import ConfigStore
 from typing import List, Any
-from omegaconf import 
+from omegaconf import MISSING
 from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
 
 # config schema imports
 from config.torch.optim import AdadeltaConf
@@ -23,16 +45,17 @@ from config.torch.optim.lr_scheduler import StepLRConf
 ```
 
 There are two areas in our Hydra-specific imports. First, since we define configs in this file, we need access to the following:
-- the `ConfigStore`
 - typing from both `typing` and `omegaconf`
-- the `dataclass` decorator
+- the `dataclass` decorator (for structured configs)
+- the `ConfigStore`
+
+**[OmegaConf]** is an external library that Hydra is built around. Every config object is a datastructure defined by OmegaConf. For our purposes, we use it to specify typing and special constants such as [`MISSING`] when there is no value specified.
 
-##### Config Store
-*where we store our configs*
+**[Structured Configs][hydra structured configs tutorial]** are dataclasses that Hydra can use to compose complex config objects. We can think of them as templates or 'starting points' for our configs. Each `*Conf` file provided by `hydra-torch` is a structured config.
 
-Briefly, the concept behind the `ConfigStore` is to create a singleton object of this class and register all config objects to it. This tutorial demonstrates the simplest approach to using the `ConfigStore`.
+**The [ConfigStore]** is a singleton object which all config objects are registered to. This gives Hydra access to our structured config definitions.
 
-##### Config Schema
+#### Config Schema
 *our config templates - providing type checking and good defaults*
 
 Second, we import two [config schema] from `hydra-torch`. Think of config schema as recommended templates for commonly used configurations. `hydra-torch` provides config schema for a large subset of common PyTorch classes. In the basic tutorial, we only consider the schema for the PyTorch classes:
@@ -45,18 +68,21 @@ Note that the naming convention for the import heirarchy mimics that of `torch`.
 
 We try to preserve the naming convention of using the suffix `-Conf` at all times to distinguish the config schema class from the class of the object that is to be configured.
 
+***
 ### Top Level Config
-After importing two pre-defined config schema for components in our training pipeline, the optimizer and scheduler, we still need a "top level" config to merge everything. We can call this config class `MNISTConf`. You will notice that this class is nothing more than a python `dataclass` and corresponds to, you guessed it, a *config schema*. We are responsible for writing this since it is not a standard class from pytorch that `hydra-torch` has a schema for.
+After importing two pre-defined config schema for components in our training pipeline, the optimizer and scheduler, we still need a "top level" config to merge everything. We can call this config class `MNISTConf`. You will notice that this class is nothing more than a python `dataclass` and corresponds to, you guessed it, a *config schema*.
+
+The top level config is application specific and thus is not provided  by `hydra-torch`.
 
-We can start this out with the configs we know we will need for the optimizer (`Adadelta`) and scheduler (`StepLR`):
+We can start this out by including the configs we know we will need for the optimizer (`Adadelta`) and scheduler (`StepLR`):
 ```python
 # our top level config:
 @dataclass
 class MNISTConf:
-    adadelta: Any = AdadeltaConf()
-    steplr: Any = StepLRConf(step_size=1)
+    adadelta: AdadeltaConf = AdadeltaConf()
+    steplr: StepLRConf = StepLRConf(step_size=1)
 ```
-Note that for `StepLRConf()` we need to pass `step_size=1` when we initialize it because it's default value is `MISSING`:
+Notice that for `StepLRConf()` we need to pass `step_size=1` when we initialize because it's default value is `MISSING`.
 ```python
 # the class imported from: config.torch.optim.lr_scheduler:
 @dataclass
@@ -64,21 +90,22 @@ class StepLRConf:
     _target_: str = "torch.optim.lr_scheduler.StepLR"
     optimizer: Any = MISSING
     step_size: Any = MISSING
-    gamma: Any = 0.1
-    last_epoch: Any = -1
+    gamma: Any = 0.1 last_epoch: Any = -1
 ```
-Later, we will pass the optimizer (also default `MISSING`) as a passed through argument when the actual `StepLR` object is instantiated.
+> **NOTE:** The `hydra-torch` configs are generated from the PyTorch source and rely on whether the module uses type annotation. Once additional type annotation is added, these configs will become more strict providing greater type safety.
 
-### Add the Top Level Conf to the ConfigStore
+Later, we will specify the optimizer (also default `MISSING`) as a passed through argument when the actual `StepLR` object is instantiated.
+
+### Adding the Top Level Config to the ConfigStore
 Very simply, we add the top-level config class `MNISTConf` to the `ConfigStore` in two lines:
 ```python
 cs = ConfigStore.instance()
-cs.store(name="config", node=MNISTConf)
+cs.store(name="mnistconf", node=MNISTConf)
 ```
-The name `config` will be passed to the `@hydra` decorator when we get to `main()`.
+The name `mnistconf` will be passed to the `@hydra` decorator when we get to `main()`.
 
 ***
-### Parting with Argparse
+### 👋 Parting with Argparse
 
 Now we're starting to realize our relationship with `argparse` isn't as serious as we thought it was. Although `argparse` is powerful, we can take it a step further. In the process we hope to introduce greater organization and free our primary file from as much boilerplate as possible.
 
@@ -123,29 +150,29 @@ class MNISTConf:
     batch_size: int = 64
     test_batch_size: int = 1000
     epochs: int = 14
-    lr: float = 1.0 # REMOVE THIS SINCE IT IS NOW WITHIN `AdadeltaConf` BELOW
-    gamma: float = 0.7 # REMOVE THIS SINCE IT IS NOW WITHIN `AdadeltaConf` BELOW
     no_cuda: bool = False
     dry_run: bool = False
     seed: int = 1
-    log_interval: int 
+    log_interval: int
     save_model: bool = False
-    adadelta: Any = AdadeltaConf()
-    steplr: Any = StepLRConf(step_size=1)
+    adadelta: AdadeltaConf = AdadeltaConf()
+    steplr: StepLRConf = StepLRConf(step_size=1)
 ```
-This works, but can feel a bit flat and disorganized (much like `argparse` args can be). Note, we also sacrifice `help` strings. Don't worry, we will remedy both of these concerns down the road.
+> **NOTE:** `learning_rate` and `gamma` are included in `AdadeltaConf()` and so they were omitted from the top-level args.
+
+This works, but can feel a bit flat and disorganized (much like `argparse` args can be). Don't worry, we will remedy this later in the tutorials. Note, we also sacrifice `help` strings. This is a planned feature, but not supported in Hydra just yet.
 
 Now our `argparse` args are at the same level as our optimizer and scheduler configs. We will remove `lr` and `gamma` since they are already present within the optimizer config `AdadeltaConf`.
 ***
-### Dropping into `main()`
+## Dropping into `main()`
 Now that we've defined all of our configs, we just need to let Hydra create our `cfg` object at runtime and make sure the `cfg` is plumbed to any object we want it to configure.
 ```python
-@hydra.main(config_name='config')
+@hydra.main(config_name='mnistconf')
 def main(cfg):
     print(cfg.pretty())
     ...
 ```
-The single idea here is that `@hydra.main` looks for a config in the `ConfigStore` instance, `cs` named "`config`". It finds `MNISTConf` (our top level conf) and populates `cfg` inside `main()` with the entire structured config including our optimizer and scheduler configs, `cfg.adadelta` and `cfg.steplr` respectively.
+The single idea here is that `@hydra.main` looks for a config in the `ConfigStore` instance, `cs` named "`mnistconf`". It finds the `MNISTConf` (our top level conf) we registered to that name and populates `cfg` inside `main()` with the fully expanded structured config. This includes our optimizer and scheduler configs, `cfg.adadelta` and `cfg.steplr`, respectively.
 
 Instrumenting `main()` is simple. Anywhere we find `args`, replace this with `cfg` since we put all of the `argparse` arguments at the top level. For example, `args.batch_size` becomes `cfg.batch_size`:
 ```python
@@ -167,31 +194,31 @@ Still inside `main()`, we want to draw attention to two slightly special cases b
 ```python
  optimizer = Adadelta(lr=cfg.adadelta.lr, #DIFF lr=args.learning_rate
                          rho=cfg.adadelta.rho,
-                         eps=cfg.adadelta.eps, 
+                         eps=cfg.adadelta.eps,
                          weight_decay=cfg.adadelta.weight_decay,
                          params=model.parameters()
- ```               
-In the case of the `optimizer`, one argument is not a part of our config -- `params`. If it wasn't obvious, this needs to be passed from the initialized `Net()` model. In the config schema that initialized `cfg.adadelta`, `params` is default to `MISSING`. The same is true of the `optimizer` field in `StepLRConf`.
+ ```
+In this case, the `optimizer` has one argument that is not a part of our config -- `params`. If it wasn't obvious, this needs to be passed from the initialized `Net()` called model. In the config schema that initialized `cfg.adadelta`, `params` is default to `MISSING`. The same is true of the `optimizer` field in `StepLRConf`.
 
 ```python
 scheduler = StepLR(step_size=cfg.steplr.step_size,
                        gamma=cfg.steplr.gamma,
                        last_epoch=cfg.steplr.last_epoch,
                        optimizer=optimizer
- ```       
+ ```
  This method for instantiation is the least invasive to the original code, but it is also the least flexible and highly verbose. Check out the [Intermediate Tutorial] for a better approach that will allow us to hotswap optimizers and schedulers, all while writing less code.
- 
+
 ***
-### Running with Hydra
+## 🏃 Running with Hydra
 
 ```bash
 $ python 00_minst.py
 ```
 That's it. Since the `@hydra.main` decorator is above `def main(cfg)`, Hydra will manage the command line, logging, and saving outputs to a date/time stamped directory automatically. These are all configurable, but the default behavior ensures expected functionality. For example, if a model checkpoint is saved, it will appear in a new directory `./outputs/DATE/TIME/`.
 
-#### New Superpowers
+### New Super Powers 🦸
 
-##### Command Line Overrides
+#### Command Line Overrides
 
 Much like passing argparse args through the CLI, we can use our default values specified in `MNISTConf` and override only the arguments/parameters we want to tweak:
 
@@ -201,7 +228,7 @@ $  python mnist_00.py epochs=1 save_model=True checkpoint_name='experiment0.pt'
 
 For more on command line overrides, see: [Hydra CLI] and [Hydra override syntax].
 
-##### Multirun
+#### Multirun
 We often end up wanting to sweep our optimizer's learning rate. Here's how Hydra can help facilitate:
 ```bash
 $ python mnist_00.py -m adadelta.lr="0.001, 0.01, 0.1"
@@ -216,11 +243,12 @@ $ python mnist_00.py -m epochs=1 dry_run=True adadelta.lr="0.001,0.01, 0.1"
 `Note:` these jobs can be dispatched to different resources and run in parallel or scheduled to run serially (by default). More info on multirun: [Hydra Multirun]. Hydra can use different hyperparameter search tools as well. See: [Hydra Ax plugin] and [Hydra Nevergrad plugin].
 
 ***
-### Summary
+## Summary
 In this tutorial, we demonstrated the path of least resistance to configuring your existing PyTorch code with Hydra. The main benefits we get from the 'Basic' level are:
 - No more boilerplate `argparse` taking up precious linecount
 - All training related arguments (`epochs`, `save_model`, etc.)  are now configurable via Hydra.
-- **All** optimizer/scheduler (`Adadelta`/`StepLR`) arguments are exposed for configuration (beyond only the ones the user write argparse lines for)
+- **All** optimizer/scheduler (`Adadelta`/`StepLR`) arguments are exposed for configuration
+  -- extending beyond only the ones the user wrote argparse code for
 - We have offloaded the book-keeping of compatible `argparse` code to Hydra via `hydra-torch` which runs tests ensuring all arguments track the API for the correct version of `pytorch`.
 
 However, there are some limitations in our current strategy that the [Intermediate Tutorial] will address. Namely:
@@ -228,18 +256,23 @@ However, there are some limitations in our current strategy that the [Intermedia
 - Configuring the dataset (*think transfer learning*)
 - Swapping in and out different Optimizers/Schedulers
 
-Once comfortable with the basics, continue on to the [Intermediate Tutorial]. 
+Once comfortable with the basics, continue on to the [Intermediate Tutorial].
 
 [//]: # (These are reference links used in the body of this note and get stripped out when the markdown processor does its job. There is no need to format nicely because it shouldn't be seen. Thanks SO - http://stackoverflow.com/questions/4823468/store-comments-in-markdown-syntax)
    [pytorch mnist example]: <https://github.com/pytorch/examples/blob/master/mnist/main.py>
    [mnist_00.py]: mnist_00.py
    [config schema]: <https://hydra.cc/docs/tutorials/structured_config/schema>
+   [configstore]: <https://hydra.cc/docs/tutorials/structured_config/config_store>
+   [hydra basic tutorial]: <https://hydra.cc/docs/tutorials/basic/your_first_app/simple_cli>
+   [hydra structured configs tutorial]: <https://hydra.cc/docs/tutorials/structured_config/intro>
    [hydra structured configs example]: <https://hydra.cc/docs/tutorials/structured_config/minimal_example>
    [hydra terminology]: <https://hydra.cc/docs/terminology>
+   [omegaconf]: <https://omegaconf.readthedocs.io/en/latest/>
+   [`missing`]: <https://omegaconf.readthedocs.io/en/latest/structured_config.html?highlight=MISSING#mandatory-missing-values>
    [hydra cli]: <https://hydra.cc/docs/tutorials/basic/your_first_app/simple_cli>
    [hydra override syntax]: <https://hydra.cc/docs/advanced/override_grammar/basic>
    [hydra multirun]: <https://hydra.cc/docs/tutorials/basic/running_your_app/multi-run>
    [hydra ax plugin]: <https://hydra.cc/docs/plugins/nevergrad_sweeper>
    [hydra nevergrad plugin]: <https://hydra.cc/docs/plugins/nevergrad_sweeper>
-   [Intermediate Tutorial]: <mnist_01.md>
-   [Advanced Tutorial]: <mnist_02.md>
+   [Intermediate Tutorial]: mnist_01.md
+   [Advanced Tutorial]: mnist_02.md
diff --git a/examples/mnist_00.py b/examples/mnist_00.py
index 3bf8633..185663a 100644
--- a/examples/mnist_00.py
+++ b/examples/mnist_00.py
@@ -1,23 +1,22 @@
+# flake8: noqa
 from __future__ import print_function
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.optim as optim
 from torchvision import datasets, transforms
 from torch.optim import Adadelta
 from torch.optim.lr_scheduler import StepLR
 
 ###### HYDRA BLOCK ######
 import hydra
-from hydra.core.config_store import ConfigStore
-from typing import List, Any
-from omegaconf import MISSING
 from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
 
 # structured config imports
 from config.torch.optim import AdadeltaConf
 from config.torch.optim.lr_scheduler import StepLRConf
 
+
 @dataclass
 class MNISTConf:
     batch_size: int = 64
@@ -28,15 +27,19 @@ class MNISTConf:
     seed: int = 1
     log_interval: int = 10
     save_model: bool = False
-    checkpoint_name: str = 'unnamed.pt'
-    adadelta: Any = AdadeltaConf()
-    steplr: Any = StepLRConf(step_size=1) # we pass a default for step_size since it is required, but missing a default in PyTorch (and consequently in hydra-torch)
+    checkpoint_name: str = "unnamed.pt"
+    adadelta: AdadeltaConf = AdadeltaConf()
+    steplr: StepLRConf = StepLRConf(
+        step_size=1
+    )  # we pass a default for step_size since it is required, but missing a default in PyTorch (and consequently in hydra-torch)
+
 
 cs = ConfigStore.instance()
-cs.store(name="config", node=MNISTConf)
+cs.store(name="mnistconf", node=MNISTConf)
 
 ###### / HYDRA BLOCK ######
 
+
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
@@ -73,9 +76,15 @@ def train(args, model, device, train_loader, optimizer, epoch):
         loss.backward()
         optimizer.step()
         if batch_idx % args.log_interval == 0:
-            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                epoch, batch_idx * len(data), len(train_loader.dataset),
-                100. * batch_idx / len(train_loader), loss.item()))
+            print(
+                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                    epoch,
+                    batch_idx * len(data),
+                    len(train_loader.dataset),
+                    100.0 * batch_idx / len(train_loader),
+                    loss.item(),
+                )
+            )
             if args.dry_run:
                 break
 
@@ -88,64 +97,72 @@ def test(model, device, test_loader):
         for data, target in test_loader:
             data, target = data.to(device), target.to(device)
             output = model(data)
-            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
-            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            test_loss += F.nll_loss(
+                output, target, reduction="sum"
+            ).item()  # sum up batch loss
+            pred = output.argmax(
+                dim=1, keepdim=True
+            )  # get the index of the max log-probability
             correct += pred.eq(target.view_as(pred)).sum().item()
 
     test_loss /= len(test_loader.dataset)
 
-    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
+    print(
+        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
+            test_loss,
+            correct,
+            len(test_loader.dataset),
+            100.0 * correct / len(test_loader.dataset),
+        )
+    )
 
 
-@hydra.main(config_name='config')
-def main(cfg): # DIFF
+@hydra.main(config_name="mnistconf")
+def main(cfg):  # DIFF
     print(cfg.pretty())
-    use_cuda = not cfg.no_cuda and torch.cuda.is_available() # DIFF 
-    torch.manual_seed(cfg.seed) # DIFF
+    use_cuda = not cfg.no_cuda and torch.cuda.is_available()  # DIFF
+    torch.manual_seed(cfg.seed)  # DIFF
     device = torch.device("cuda" if use_cuda else "cpu")
 
-    train_kwargs = {'batch_size': cfg.batch_size} # DIFF
-    test_kwargs = {'batch_size': cfg.test_batch_size} # DIFF
+    train_kwargs = {"batch_size": cfg.batch_size}  # DIFF
+    test_kwargs = {"batch_size": cfg.test_batch_size}  # DIFF
     if use_cuda:
-        cuda_kwargs = {'num_workers': 1,
-                       'pin_memory': True,
-                       'shuffle': True}
+        cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
         train_kwargs.update(cuda_kwargs)
         test_kwargs.update(cuda_kwargs)
 
-    transform=transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.1307,), (0.3081,))
-        ])
-    dataset1 = datasets.MNIST('../data', train=True, download=True,
-                       transform=transform)
-    dataset2 = datasets.MNIST('../data', train=False,
-                       transform=transform)
-    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
+    transform = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+    )
+    dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)
+    dataset2 = datasets.MNIST("../data", train=False, transform=transform)
+    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
     model = Net().to(device)
 
-    optimizer = Adadelta(lr=cfg.adadelta.lr,
-                         rho=cfg.adadelta.rho,
-                         eps=cfg.adadelta.eps,
-                         weight_decay=cfg.adadelta.weight_decay,
-                         params=model.parameters()) # DIFF
-    scheduler = StepLR(step_size=cfg.steplr.step_size,
-                       gamma=cfg.steplr.gamma,
-                       last_epoch=cfg.steplr.last_epoch,
-                       optimizer=optimizer) # DIFF
-
-    for epoch in range(1, cfg.epochs + 1): # DIFF
-        train(cfg, model, device, train_loader, optimizer, epoch) # DIFF
+    optimizer = Adadelta(
+        lr=cfg.adadelta.lr,
+        rho=cfg.adadelta.rho,
+        eps=cfg.adadelta.eps,
+        weight_decay=cfg.adadelta.weight_decay,
+        params=model.parameters(),
+    )  # DIFF
+    scheduler = StepLR(
+        step_size=cfg.steplr.step_size,
+        gamma=cfg.steplr.gamma,
+        last_epoch=cfg.steplr.last_epoch,
+        optimizer=optimizer,
+    )  # DIFF
+
+    for epoch in range(1, cfg.epochs + 1):  # DIFF
+        train(cfg, model, device, train_loader, optimizer, epoch)  # DIFF
         test(model, device, test_loader)
         scheduler.step()
 
-    if cfg.save_model: # DIFF
-        torch.save(model.state_dict(), cfg.checkpoint_name) # DIFF
+    if cfg.save_model:  # DIFF
+        torch.save(model.state_dict(), cfg.checkpoint_name)  # DIFF
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From 585fd50d9cfaba925aa9a424d235d43a8e1391e9 Mon Sep 17 00:00:00 2001
From: rosario <rosario@cs.washington.edu>
Date: Mon, 26 Oct 2020 13:07:08 -0700
Subject: [PATCH 10/10] final draft

---
 examples/mnist_00.md | 59 +++++++++++++++++++++++++-------------------
 examples/mnist_00.py |  4 +--
 2 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/examples/mnist_00.md b/examples/mnist_00.md
index d247341..9d0c671 100644
--- a/examples/mnist_00.md
+++ b/examples/mnist_00.md
@@ -29,50 +29,57 @@ Although this tutorial is aimed at being self-contained, taking a look through H
 ***
 ## The 'HYDRA BLOCK'
 
-For clarity in this tutorial, as we modify the [PyTorch MNIST example], will make the diffs explicit. Most of the changes we introduce will be at the top of the file within the commented `##### HYDRA BLOCK #####`, though in practice much of this block could reside in its own concise imported file.
+For clarity, as we modify the [PyTorch MNIST example], we will make the diffs explicit. Most of the changes we introduce will be at the top of the file within the commented `##### HYDRA BLOCK #####`, though in practice much of this block could reside in its own concise imported file.
 
 ### Imports
 ```python
 import hydra
-from typing import List, Any
-from omegaconf import MISSING
-from dataclasses import dataclass
 from hydra.core.config_store import ConfigStore
+from dataclasses import dataclass
 
-# config schema imports
+# hydra-torch structured config imports
 from config.torch.optim import AdadeltaConf
 from config.torch.optim.lr_scheduler import StepLRConf
 ```
 
 There are two areas in our Hydra-specific imports. First, since we define configs in this file, we need access to the following:
-- typing from both `typing` and `omegaconf`
-- the `dataclass` decorator (for structured configs)
 - the `ConfigStore`
+- the `dataclass` decorator (for structured configs)
 
-**[OmegaConf]** is an external library that Hydra is built around. Every config object is a datastructure defined by OmegaConf. For our purposes, we use it to specify typing and special constants such as [`MISSING`] when there is no value specified.
+**The [ConfigStore]** is a singleton object which all config objects are registered to. This gives Hydra access to our structured config definitions *once they're registered*.
 
-**[Structured Configs][hydra structured configs tutorial]** are dataclasses that Hydra can use to compose complex config objects. We can think of them as templates or 'starting points' for our configs. Each `*Conf` file provided by `hydra-torch` is a structured config.
+**[Structured Configs][hydra structured configs tutorial]** are dataclasses that Hydra can use to compose complex config objects. We can think of them as templates or 'starting points' for our configs. Each `*Conf` file provided by `hydra-torch` is a structured config. See an example of one below:
+
+```python
+# the structured config for Adadelta imported from config.torch.optim:
+@dataclass
+class AdadeltaConf:
+    _target_: str = "torch.optim.adadelta.Adadelta"
+    params: Any = MISSING
+    lr: Any = 1.0
+    rho: Any = 0.9
+    eps: Any = 1e-06
+    weight_decay: Any = 0
+```
 
-**The [ConfigStore]** is a singleton object which all config objects are registered to. This gives Hydra access to our structured config definitions.
+> **NOTE:** [`MISSING`] is a special constant used to indicate there is no default value specified.
 
-#### Config Schema
-*our config templates - providing type checking and good defaults*
+The second set of imports correspond to two components in the training pipeline of the [PyTorch MNIST example]:
 
-Second, we import two [config schema] from `hydra-torch`. Think of config schema as recommended templates for commonly used configurations. `hydra-torch` provides config schema for a large subset of common PyTorch classes. In the basic tutorial, we only consider the schema for the PyTorch classes:
 - `Adadelta` which resides in `torch.optim`
 - `StepLR` which resides in `torch.optim.lr_scheduler`
 
-Note that the naming convention for the import heirarchy mimics that of `torch`. We correspondingly import the following config schema:
+Note that the naming convention for the import hierarchy mimics that of `torch`. We correspondingly import the following structured configs:
 - `AdadeltaConf` from `config.torch.optim`
 - `StepLRConf` from `config.torch.optim.lr_scheduler`
 
-We try to preserve the naming convention of using the suffix `-Conf` at all times to distinguish the config schema class from the class of the object that is to be configured.
+Generally, we follow the naming convention of applying the suffix `-Conf` to distinguish the structured config class from the class of the object to be configured.
 
 ***
 ### Top Level Config
-After importing two pre-defined config schema for components in our training pipeline, the optimizer and scheduler, we still need a "top level" config to merge everything. We can call this config class `MNISTConf`. You will notice that this class is nothing more than a python `dataclass` and corresponds to, you guessed it, a *config schema*.
+After importing two pre-defined structured configs for components in our training pipeline, the optimizer and scheduler, we still need a "top level" config to merge everything. We can call this config class `MNISTConf`. You will notice that this class is nothing more than a python `dataclass` and corresponds to, you guessed it, a *structured config*.
 
-The top level config is application specific and thus is not provided  by `hydra-torch`.
+> **NOTE:** The top level config is application specific and thus is not provided  by `hydra-torch`.
 
 We can start this out by including the configs we know we will need for the optimizer (`Adadelta`) and scheduler (`StepLR`):
 ```python
@@ -84,7 +91,7 @@ class MNISTConf:
 ```
 Notice that for `StepLRConf()` we need to pass `step_size=1` when we initialize because it's default value is `MISSING`.
 ```python
-# the class imported from: config.torch.optim.lr_scheduler:
+# the structured config imported from hydra-torch in config.torch.optim.lr_scheduler
 @dataclass
 class StepLRConf:
     _target_: str = "torch.optim.lr_scheduler.StepLR"
@@ -97,7 +104,7 @@ class StepLRConf:
 Later, we will specify the optimizer (also default `MISSING`) as a passed through argument when the actual `StepLR` object is instantiated.
 
 ### Adding the Top Level Config to the ConfigStore
-Very simply, we add the top-level config class `MNISTConf` to the `ConfigStore` in two lines:
+Very simply, but crucially, we add the top-level config class `MNISTConf` to the `ConfigStore` in two lines:
 ```python
 cs = ConfigStore.instance()
 cs.store(name="mnistconf", node=MNISTConf)
@@ -105,7 +112,7 @@ cs.store(name="mnistconf", node=MNISTConf)
 The name `mnistconf` will be passed to the `@hydra` decorator when we get to `main()`.
 
 ***
-### 👋 Parting with Argparse
+### Parting with Argparse
 
 Now we're starting to realize our relationship with `argparse` isn't as serious as we thought it was. Although `argparse` is powerful, we can take it a step further. In the process we hope to introduce greater organization and free our primary file from as much boilerplate as possible.
 
@@ -160,7 +167,7 @@ class MNISTConf:
 ```
 > **NOTE:** `learning_rate` and `gamma` are included in `AdadeltaConf()` and so they were omitted from the top-level args.
 
-This works, but can feel a bit flat and disorganized (much like `argparse` args can be). Don't worry, we will remedy this later in the tutorials. Note, we also sacrifice `help` strings. This is a planned feature, but not supported in Hydra just yet.
+This works, but can feel a bit flat and disorganized (much like `argparse` args can be). Don't worry, we will remedy this later in the tutorial series. Note, we also sacrifice `help` strings. This is a planned feature, but not supported in Hydra just yet.
 
 Now our `argparse` args are at the same level as our optimizer and scheduler configs. We will remove `lr` and `gamma` since they are already present within the optimizer config `AdadeltaConf`.
 ***
@@ -198,7 +205,7 @@ Still inside `main()`, we want to draw attention to two slightly special cases b
                          weight_decay=cfg.adadelta.weight_decay,
                          params=model.parameters()
  ```
-In this case, the `optimizer` has one argument that is not a part of our config -- `params`. If it wasn't obvious, this needs to be passed from the initialized `Net()` called model. In the config schema that initialized `cfg.adadelta`, `params` is default to `MISSING`. The same is true of the `optimizer` field in `StepLRConf`.
+In this case, the `optimizer` has one argument that is not a part of our config -- `params`. If it wasn't obvious, this needs to be passed from the initialized `Net()` called model. In the structured config that initialized `cfg.adadelta`, `params` is default to `MISSING`. The same is true of the `optimizer` field in `StepLRConf`.
 
 ```python
 scheduler = StepLR(step_size=cfg.steplr.step_size,
@@ -209,7 +216,7 @@ scheduler = StepLR(step_size=cfg.steplr.step_size,
  This method for instantiation is the least invasive to the original code, but it is also the least flexible and highly verbose. Check out the [Intermediate Tutorial] for a better approach that will allow us to hotswap optimizers and schedulers, all while writing less code.
 
 ***
-## 🏃 Running with Hydra
+## Running with Hydra
 
 ```bash
 $ python 00_minst.py
@@ -240,15 +247,15 @@ It can be useful to test multirun outputs by passing `dry_run=True` and setting
 $ python mnist_00.py -m epochs=1 dry_run=True adadelta.lr="0.001,0.01, 0.1"
 ```
 
-`Note:` these jobs can be dispatched to different resources and run in parallel or scheduled to run serially (by default). More info on multirun: [Hydra Multirun]. Hydra can use different hyperparameter search tools as well. See: [Hydra Ax plugin] and [Hydra Nevergrad plugin].
+> **NOTE:** these jobs can be dispatched to different resources and run in parallel or scheduled to run serially (by default). More info on multirun: [Hydra Multirun]. Hydra can use different hyperparameter search tools as well. See: [Hydra Ax plugin] and [Hydra Nevergrad plugin].
 
 ***
 ## Summary
 In this tutorial, we demonstrated the path of least resistance to configuring your existing PyTorch code with Hydra. The main benefits we get from the 'Basic' level are:
-- No more boilerplate `argparse` taking up precious linecount
+- No more boilerplate `argparse` taking up precious linecount.
 - All training related arguments (`epochs`, `save_model`, etc.)  are now configurable via Hydra.
 - **All** optimizer/scheduler (`Adadelta`/`StepLR`) arguments are exposed for configuration
-  -- extending beyond only the ones the user wrote argparse code for
+  -- extending beyond only the ones the user wrote argparse code for.
 - We have offloaded the book-keeping of compatible `argparse` code to Hydra via `hydra-torch` which runs tests ensuring all arguments track the API for the correct version of `pytorch`.
 
 However, there are some limitations in our current strategy that the [Intermediate Tutorial] will address. Namely:
diff --git a/examples/mnist_00.py b/examples/mnist_00.py
index 185663a..12253c1 100644
--- a/examples/mnist_00.py
+++ b/examples/mnist_00.py
@@ -9,10 +9,10 @@
 
 ###### HYDRA BLOCK ######
 import hydra
-from dataclasses import dataclass
 from hydra.core.config_store import ConfigStore
+from dataclasses import dataclass
 
-# structured config imports
+# hydra-torch structured config imports
 from config.torch.optim import AdadeltaConf
 from config.torch.optim.lr_scheduler import StepLRConf