Doubled feature map size

dfan · Dec 27, 2018 · 0bda3e7 · 0bda3e7
1 parent 06a3589
commit 0bda3e7
Show file tree

Hide file tree

Showing 7 changed files with 30 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@ PyTorch implementation of the hourglass architecture proposed in ["Single-Image
 
 ## Data Format
 Training data: single 128x128 RGB image of the object, its binary mask (white pixel only if object is present in that pixel), and a RGB image of the surface normal (where the color channels represent the 3D vectors).  
-Validation: given a single 128x128 RGb image of the object and its binary mask, predict the surface normal orientation and output as a RGB image. Validation accuracy is calculated on the object's masked region, so not the background. There are 20,000 testing images and 2,000 testing images to output predictions for. Data can be downloaded [here](http://cos429-f18.cs.princeton.edu/surface-normal-prediction-website-class-project/cos429.tgz).
+Validation: given a single 128x128 RGb image of the object and its binary mask, predict the surface normal orientation and output as a RGB image. Validation accuracy is calculated on the object's masked region, so not the background. There are 20,000 training images and 2,000 testing images to output predictions for. Data can be downloaded [here](http://cos429-f18.cs.princeton.edu/surface-normal-prediction-website-class-project/cos429.tgz).
 
 **Sample input**:
 

diff --git a/model/model.py b/model/model.py
@@ -16,11 +16,11 @@ def __init__(self):
     module_2 = Module2(module_3)
     module_1 = Module1(module_2)
     self.hourglass = nn.Sequential(
-                      nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=1, padding=3),
-                      nn.BatchNorm2d(64),
+                      nn.Conv2d(in_channels=3, out_channels=128, kernel_size=7, stride=1, padding=3),
+                      nn.BatchNorm2d(128),
                       nn.ReLU(),
                       module_1,
-                      nn.Conv2d(in_channels=32, out_channels=3, kernel_size=3, stride=1, padding=1)
+                      nn.Conv2d(in_channels=64, out_channels=3, kernel_size=3, stride=1, padding=1)
                     )
 
   def forward(self, x):

diff --git a/model/module1.py b/model/module1.py
@@ -8,15 +8,15 @@ def __init__(self, module_2):
     super(Module1, self).__init__()
     self.layer1 = nn.Sequential(
                     nn.MaxPool2d(kernel_size=2, stride=2), # 2x
-                    Inception(64, 16, [(3,16,16), (5,16,16), (7,16,16)]),
-                    Inception(64, 16, [(3,16,16), (5,16,16), (7,16,16)]),
+                    Inception(128, 32, [(3,32,32), (5,32,32), (7,32,32)]),
+                    Inception(128, 32, [(3,32,32), (5,32,32), (7,32,32)]),
                     module_2,
-                    Inception(64, 16, [(3,32,16), (5,32,16), (7,32,16)]),
-                    Inception(64, 8, [(3,16,8), (7,16,8), (11,16,8)]),
-                    Interpolate(scale_factor=2, mode='nearest') # up to original, 64 channel
+                    Inception(128, 32, [(3,64,32), (5,64,32), (7,64,32)]),
+                    Inception(128, 16, [(3,32,16), (7,32,16), (11,32,16)]),
+                    Interpolate(scale_factor=2, mode='nearest')
                   )
     self.layer2 = nn.Sequential(
-                    Inception(64, 8, [(3,32,8), (7,32,8), (11,32,8)])
+                    Inception(128, 16, [(3,64,16), (7,64,16), (11,64,16)])
                   )
 
   def forward(self,x):

diff --git a/model/module2.py b/model/module2.py
@@ -8,16 +8,16 @@ def __init__(self, module_3):
     super(Module2, self).__init__()
     self.layer1 = nn.Sequential(
                     nn.MaxPool2d(kernel_size=2, stride=2), # 4x
-                    Inception(64, 16, [(3,16,16), (5,16,16), (7,16,16)]),
-                    Inception(64, 32, [(3,16,32), (5,16,32), (7,16,32)]),
+                    Inception(128, 32, [(3,32,32), (5,32,32), (7,32,32)]),
+                    Inception(128, 64, [(3,32,64), (5,32,64), (7,32,64)]),
                     module_3,
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 16, [(3,16,16), (5,16,16), (7,16,16)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 32, [(3,32,32), (5,32,32), (7,32,32)]),
                     Interpolate(scale_factor=2, mode='nearest') # up to 2x, output is 128 channel
                   )
     self.layer2 = nn.Sequential(
-                    Inception(64, 16, [(3,16,16), (5,16,16), (7,16,16)]),
-                    Inception(64, 16, [(3,32,16), (7,32,16), (11,32,16)])
+                    Inception(128, 32, [(3,32,32), (5,32,32), (7,32,32)]),
+                    Inception(128, 32, [(3,64,32), (7,64,32), (11,64,32)])
                   )
 
   def forward(self,x):

diff --git a/model/module3.py b/model/module3.py
@@ -7,16 +7,16 @@ class Module3(nn.Module):
   def __init__(self, module_4):
     super(Module3, self).__init__()
     self.layer1 = nn.Sequential(
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,32,32), (7,32,32), (11,32,32)])
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,64,64), (7,64,64), (11,64,64)])
                   )
     self.layer2 = nn.Sequential(
                     nn.AvgPool2d(kernel_size=2, stride=2), # 8x
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
                     module_4, # down 16x then up to 8x
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,32,32), (7,32,32), (11,32,32)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,64,64), (7,64,64), (11,64,64)]),
                     Interpolate(scale_factor=2, mode='nearest') # up to 4x. 256 channel
                   )
 

diff --git a/model/module4.py b/model/module4.py
@@ -7,14 +7,14 @@ class Module4(nn.Module):
   def __init__(self):
     super(Module4, self).__init__()
     self.layer1 = nn.Sequential(
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)])
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)])
                   )
     self.layer2 = nn.Sequential(
                     nn.AvgPool2d(kernel_size=2, stride=2),
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
-                    Inception(128, 32, [(3,16,32), (5,16,32), (7,16,32)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
+                    Inception(256, 64, [(3,32,64), (5,32,64), (7,32,64)]),
                     Interpolate(scale_factor=2, mode='nearest') # Up to 8x, 256 channel
                   )
 

diff --git a/train.py b/train.py
@@ -84,7 +84,7 @@ def train(finetune, finetune_epochs):
   learning_rate = 0.001
 
   train_params = {'batch_size': 25, 'shuffle': True, 'num_workers': 5}
-  test_params = {'batch_size': 50, 'shuffle': True, 'num_workers': 5}
+  test_params = {'batch_size': 25, 'shuffle': True, 'num_workers': 5}
 
   # Load Data
   train_set = NormalsDataset(is_train = True, transform=transforms.ToTensor())
@@ -98,7 +98,7 @@ def train(finetune, finetune_epochs):
   # Load existing model and do finetuning. Only on GPU
   if finetune:
     optimizer = torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9, nesterov=True)
-    train_params = {'batch_size': 50, 'shuffle': True, 'num_workers': 5}
+    train_params = {'batch_size': 25, 'shuffle': True, 'num_workers': 5}
     train_loader = data.DataLoader(train_set, **train_params)
     model.load_state_dict(torch.load(MODEL_DIR))
     model = model.to(device)
@@ -114,7 +114,7 @@ def train(finetune, finetune_epochs):
     if epoch == 17 and not finetune:
       optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)
       # Increase batch size during slow learning phase
-      train_params = {'batch_size': 50, 'shuffle': True, 'num_workers': 5}
+      train_params = {'batch_size': 25, 'shuffle': True, 'num_workers': 5}
       train_loader = data.DataLoader(train_set, **train_params)
     elif epoch == 27 and not finetune:
       optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, nesterov=True)