warmspringwinds · ahundt · Jan 24, 2017 · Jan 24, 2017 · Jan 24, 2017 · Jan 24, 2017
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,13 @@
+# file tpyes to ignore
 .ipynb_checkpoints
 *.pyc
+*.tfrecords
+*.log
+*.ckpt*
+
+# Files
+Untitled*
+
+# directories to ignore
+/log_dir
+/save_dir
diff --git a/README.md b/README.md
@@ -65,6 +65,17 @@ the PASCAL VOC 2012 Training data and additional Berkeley segmentation data for
 It was important to test models on restricted Validation dataset to make sure no images in the
 validation dataset were seen by model during training.
 
+Here is how to download the datasets:
+```bash
+
+# original PASCAL VOC 2012
+curl -O http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar # 2 GB
+
+# berkeley augmented PASCAL VOC
+curl -O http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz # 1.3 GB
+```
+
+
 The code to acquire the training and validating the model is also provided in the framework.
 
 ### Fully Convolutional Networks for Semantic Segmentation (FCNs)

diff --git a/tf_image_segmentation/models/fcn_16s.py b/tf_image_segmentation/models/fcn_16s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -83,7 +86,7 @@ def FCN_16s(image_batch_tensor,
 
 
             # Calculate the ouput size of the upsampled tensor
-            last_layer_upsampled_by_factor_2_logits_shape = tf.pack([
+            last_layer_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                   last_layer_logits_shape[0],
                                                                   last_layer_logits_shape[1] * 2,
                                                                   last_layer_logits_shape[2] * 2,
@@ -108,7 +111,7 @@ def FCN_16s(image_batch_tensor,
                                        [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
-                                       weights_initializer=tf.zeros_initializer,
+                                       weights_initializer=tf.zeros_initializer(),
                                        scope='pool4_fc')
 
             fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits
@@ -117,7 +120,7 @@ def FCN_16s(image_batch_tensor,
 
 
             # Calculate the ouput size of the upsampled tensor
-            fused_last_layer_and_pool4_upsampled_by_factor_16_logits_shape = tf.pack([
+            fused_last_layer_and_pool4_upsampled_by_factor_16_logits_shape = tf.stack([
                                                                           fused_last_layer_and_pool4_logits_shape[0],
                                                                           fused_last_layer_and_pool4_logits_shape[1] * 16,
                                                                           fused_last_layer_and_pool4_logits_shape[2] * 16,
@@ -146,4 +149,4 @@ def FCN_16s(image_batch_tensor,
                 original_fcn_32s_checkpoint_string = 'fcn_32s/' +  variable.name[len(fcn_16s_scope.original_name_scope):-2]
                 fcn_32s_variables_mapping[original_fcn_32s_checkpoint_string] = variable
 
-    return fused_last_layer_and_pool4_upsampled_by_factor_16_logits, fcn_32s_variables_mapping
+    return fused_last_layer_and_pool4_upsampled_by_factor_16_logits, fcn_32s_variables_mapping
diff --git a/tf_image_segmentation/models/fcn_32s.py b/tf_image_segmentation/models/fcn_32s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -114,7 +117,7 @@ def FCN_32s(image_batch_tensor,
         downsampled_logits_shape = tf.shape(logits)
 
         # Calculate the ouput size of the upsampled tensor
-        upsampled_logits_shape = tf.pack([
+        upsampled_logits_shape = tf.stack([
                                           downsampled_logits_shape[0],
                                           downsampled_logits_shape[1] * upsample_factor,
                                           downsampled_logits_shape[2] * upsample_factor,
@@ -147,4 +150,4 @@ def FCN_32s(image_batch_tensor,
             original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.name)+1:-2]
             vgg_16_variables_mapping[original_vgg_16_checkpoint_string] = variable
 
-    return upsampled_logits, vgg_16_variables_mapping
+    return upsampled_logits, vgg_16_variables_mapping
diff --git a/tf_image_segmentation/models/fcn_8s.py b/tf_image_segmentation/models/fcn_8s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -83,7 +86,7 @@ def FCN_8s(image_batch_tensor,
 
 
             # Calculate the ouput size of the upsampled tensor
-            last_layer_upsampled_by_factor_2_logits_shape = tf.pack([
+            last_layer_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                   last_layer_logits_shape[0],
                                                                   last_layer_logits_shape[1] * 2,
                                                                   last_layer_logits_shape[2] * 2,
@@ -110,7 +113,7 @@ def FCN_8s(image_batch_tensor,
                                        [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
-                                       weights_initializer=tf.zeros_initializer,
+                                       weights_initializer=tf.zeros_initializer(),
                                        scope='pool4_fc')
 
             fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits
@@ -121,7 +124,7 @@ def FCN_8s(image_batch_tensor,
 
 
             # Calculate the ouput size of the upsampled tensor
-            fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.pack([
+            fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.stack([
                                                                           fused_last_layer_and_pool4_logits_shape[0],
                                                                           fused_last_layer_and_pool4_logits_shape[1] * 2,
                                                                           fused_last_layer_and_pool4_logits_shape[2] * 2,
@@ -147,7 +150,7 @@ def FCN_8s(image_batch_tensor,
                                        [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
-                                       weights_initializer=tf.zeros_initializer,
+                                       weights_initializer=tf.zeros_initializer(),
                                        scope='pool3_fc')
 
 
@@ -159,7 +162,7 @@ def FCN_8s(image_batch_tensor,
 
 
             # Calculate the ouput size of the upsampled tensor
-            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.pack([
+            fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.stack([
                                                                           fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[0],
                                                                           fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[1] * 8,
                                                                           fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[2] * 8,
@@ -191,4 +194,4 @@ def FCN_8s(image_batch_tensor,
                 original_fcn_16s_checkpoint_string = 'fcn_16s/' +  variable.name[len(fcn_8s_scope.original_name_scope):-2]
                 fcn_16s_variables_mapping[original_fcn_16s_checkpoint_string] = variable
 
-    return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, fcn_16s_variables_mapping
+    return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, fcn_16s_variables_mapping
diff --git a/tf_image_segmentation/models/resnet_v1_101_16s.py b/tf_image_segmentation/models/resnet_v1_101_16s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -116,7 +119,7 @@ def resnet_v1_101_16s(image_batch_tensor,
         downsampled_logits_shape = tf.shape(logits)
 
         # Calculate the ouput size of the upsampled tensor
-        upsampled_logits_shape = tf.pack([
+        upsampled_logits_shape = tf.stack([
                                           downsampled_logits_shape[0],
                                           downsampled_logits_shape[1] * upsample_factor,
                                           downsampled_logits_shape[2] * upsample_factor,
@@ -145,4 +148,4 @@ def resnet_v1_101_16s(image_batch_tensor,
             original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_16s.original_name_scope):-2]
             resnet_v1_101_16s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable
 
-    return upsampled_logits, resnet_v1_101_16s_variables_mapping
+    return upsampled_logits, resnet_v1_101_16s_variables_mapping
diff --git a/tf_image_segmentation/models/resnet_v1_101_8s.py b/tf_image_segmentation/models/resnet_v1_101_8s.py
@@ -9,7 +9,7 @@
 from preprocessing.vgg_preprocessing import _R_MEAN, _G_MEAN, _B_MEAN
 
 
-def extract_resnet_v1_101_mapping_without_logits(resnet_v1_101_variables_mapping):
+def extract_resnet_v1_101_mapping_without_logits(resnet_v1_101_variables_mapping, mapping_prefix=''):
     """Removes the logits variable mapping from resnet_v1_101_8s to resnet_v1_101 model mapping dict.
     Given the resnet_v1_101_8s to resnet_v1_101 model mapping dict which is returned by
     resnet_v1_101_8s() function, remove the mapping for the fc8 variable. This is done because this
@@ -24,6 +24,13 @@ def extract_resnet_v1_101_mapping_without_logits(resnet_v1_101_variables_mapping
         Dict which maps the resnet_v1_101_8s model's variables to resnet_v1_101 checkpoint variables
         names. Look at resnet_v1_101_8s() function for more details.
 
+    mapping_prefix : string
+        mapping_prefix is the prefix added to convert the names of the tensors you've created to match
+        the names of the tensors in the saved model checkpoint. When loading from imagenet pretrained
+        weights such as http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz,
+        use the default empty mapping prefix, then when reloading from a checkpoint use
+        'resnet_v1_101_8s/'.
+
     Returns
     -------
     updated_mapping : dict {string: variable}
@@ -37,11 +44,12 @@ def extract_resnet_v1_101_mapping_without_logits(resnet_v1_101_variables_mapping
     resnet_v1_101_without_logits_keys = []
 
     for key in resnet_v1_101_keys:
-
+        print key
         if 'logits' not in key:
             resnet_v1_101_without_logits_keys.append(key)
-
-    updated_mapping = {key: resnet_v1_101_variables_mapping[key] for key in resnet_v1_101_without_logits_keys}
+    # TODO: fix the following hack to get the scope right
+    # updated_mapping = {'resnet_v1_101_8s/' + key: resnet_v1_101_variables_mapping[key] for key in resnet_v1_101_without_logits_keys}
+    updated_mapping = {mapping_prefix + key: resnet_v1_101_variables_mapping[key] for key in resnet_v1_101_without_logits_keys}
 
     return updated_mapping
 
@@ -110,13 +118,14 @@ def resnet_v1_101_8s(image_batch_tensor,
                                                 number_of_classes,
                                                 is_training=is_training,
                                                 global_pool=False,
-                                                output_stride=8)
+                                                output_stride=8,
+                                                spatial_squeeze=False)
 
 
         downsampled_logits_shape = tf.shape(logits)
 
         # Calculate the ouput size of the upsampled tensor
-        upsampled_logits_shape = tf.pack([
+        upsampled_logits_shape = tf.stack([
                                           downsampled_logits_shape[0],
                                           downsampled_logits_shape[1] * upsample_factor,
                                           downsampled_logits_shape[2] * upsample_factor,
@@ -139,10 +148,9 @@ def resnet_v1_101_8s(image_batch_tensor,
         resnet_v1_101_8s_variables = slim.get_variables(resnet_v1_101_8s)
 
         for variable in resnet_v1_101_8s_variables:
-
             # Here we remove the part of a name of the variable
             # that is responsible for the current variable scope
             original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_8s.original_name_scope):-2]
             resnet_v1_101_8s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable
 
-    return upsampled_logits, resnet_v1_101_8s_variables_mapping
+    return upsampled_logits, resnet_v1_101_8s_variables_mapping
diff --git a/tf_image_segmentation/models/resnet_v1_50_16s.py b/tf_image_segmentation/models/resnet_v1_50_16s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -116,7 +119,7 @@ def resnet_v1_50_16s(image_batch_tensor,
         downsampled_logits_shape = tf.shape(logits)
 
         # Calculate the ouput size of the upsampled tensor
-        upsampled_logits_shape = tf.pack([
+        upsampled_logits_shape = tf.stack([
                                           downsampled_logits_shape[0],
                                           downsampled_logits_shape[1] * upsample_factor,
                                           downsampled_logits_shape[2] * upsample_factor,
@@ -145,4 +148,4 @@ def resnet_v1_50_16s(image_batch_tensor,
             original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_16s.original_name_scope):-2]
             resnet_v1_50_16s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable
 
-    return upsampled_logits, resnet_v1_50_16s_variables_mapping
+    return upsampled_logits, resnet_v1_50_16s_variables_mapping
diff --git a/tf_image_segmentation/models/resnet_v1_50_8s.py b/tf_image_segmentation/models/resnet_v1_50_8s.py
@@ -3,6 +3,9 @@
 from preprocessing import vgg_preprocessing
 from ..utils.upsampling import bilinear_upsample_weights
 
+# For comparing tf versions for backwards compatibility
+from packaging import version
+
 slim = tf.contrib.slim
 
 # Mean values for VGG-16
@@ -116,7 +119,7 @@ def resnet_v1_50_8s(image_batch_tensor,
         downsampled_logits_shape = tf.shape(logits)
 
         # Calculate the ouput size of the upsampled tensor
-        upsampled_logits_shape = tf.pack([
+        upsampled_logits_shape = tf.stack([
                                           downsampled_logits_shape[0],
                                           downsampled_logits_shape[1] * upsample_factor,
                                           downsampled_logits_shape[2] * upsample_factor,
@@ -145,4 +148,4 @@ def resnet_v1_50_8s(image_batch_tensor,
             original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_8s.original_name_scope):-2]
             resnet_v1_50_8s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable
 
-    return upsampled_logits, resnet_v1_50_8s_variables_mapping
+    return upsampled_logits, resnet_v1_50_8s_variables_mapping
diff --git a/tf_image_segmentation/models/unet.py b/tf_image_segmentation/models/unet.py
@@ -0,0 +1,60 @@
+
+import numpy as np
+from keras.layers.core import Reshape, Activation
+from keras.models import Model
+from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D
+from keras.optimizers import Adam
+from keras.callbacks import ModelCheckpoint, LearningRateScheduler
+from keras import backend as K
+
+# https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
+def get_unet(image_size, num_classes, tensor=None):
+
+    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    inputs = Input(image_size, tensor=tensor)
+    conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs)
+    conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1)
+    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
+
+    conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1)
+    conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2)
+    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
+
+    conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2)
+    conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3)
+    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
+
+    conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(pool3)
+    conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv4)
+    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
+
+    conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(pool4)
+    conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(conv5)
+
+    up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=concat_axis)
+    conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(up6)
+    conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv6)
+
+    up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=concat_axis)
+    conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(up7)
+    conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv7)
+
+    up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=concat_axis)
+    conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up8)
+    conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv8)
+
+    up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=concat_axis)
+    conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up9)
+    conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv9)
+
+    conv10 = Convolution2D(num_classes, 1, 1, activation='relu')(conv9)
+
+    x = Reshape((image_size[0]*image_size[1], num_classes))(conv10)
+
+    x = Activation('softmax')(x)
+
+    x = Reshape((image_size[0], image_size[1], num_classes))(x)
+
+    model = Model(input=inputs, output=x)
+
+    return model
diff --git a/tf_image_segmentation/recipes/__init__.py b/tf_image_segmentation/recipes/__init__.py