Update docs

titu1994 · Jan 7, 2018 · 1c8d655 · 1c8d655
1 parent 39b5eaa
commit 1c8d655
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 9 deletions.
diff --git a/data_loader.py b/data_loader.py
@@ -3,13 +3,12 @@
 import glob
 
 import tensorflow as tf
-from tensorflow import data as tfdata
 
+# path to the images and the text file which holds the scores and ids
 base_images_path = r'D:\Yue\Documents\Datasets\AVA_dataset\images\images\\'
 ava_dataset_path = r'D:\Yue\Documents\Datasets\AVA_dataset\AVA.txt'
 
 IMAGE_SIZE = 224
-BASE_LEN = len(base_images_path) - 1
 
 files = glob.glob(base_images_path + "*.jpg")
 files = sorted(files)
@@ -46,8 +45,19 @@
 
 print('Train set size : ', train_image_paths.shape, train_scores.shape)
 print('Val set size : ', val_image_paths.shape, val_scores.shape)
+print('Train and validation datasets ready !')
 
 def parse_data(filename, scores):
+    '''
+    Loads the image file, and randomly applies crops and flips to each image.
+
+    Args:
+        filename: the filename from the record
+        scores: the scores from the record
+
+    Returns:
+        an image referred to by the filename and its scores
+    '''
     image = tf.read_file(filename)
     image = tf.image.decode_jpeg(image, channels=3)
     image = tf.image.resize_images(image, (256, 256))
@@ -57,17 +67,37 @@ def parse_data(filename, scores):
     return image, scores
 
 def parse_data_without_augmentation(filename, scores):
+    '''
+    Loads the image file without any augmentation. Used for validation set.
+
+    Args:
+        filename: the filename from the record
+        scores: the scores from the record
+
+    Returns:
+        an image referred to by the filename and its scores
+    '''
     image = tf.read_file(filename)
     image = tf.image.decode_jpeg(image, channels=3)
     image = tf.image.resize_images(image, (IMAGE_SIZE, IMAGE_SIZE))
     image = (tf.cast(image, tf.float32) - 127.5) / 127.5
     return image, scores
 
-print('Train and validation datasets ready !')
-
 def train_generator(batchsize, shuffle=True):
+    '''
+    Creates a python generator that loads the AVA dataset images with random data
+    augmentation and generates numpy arrays to feed into the Keras model for training.
+
+    Args:
+        batchsize: batchsize for training
+        shuffle: whether to shuffle the dataset
+
+    Returns:
+        a batch of samples (X_images, y_scores)
+    '''
     with tf.Session() as sess:
-        train_dataset = tfdata.Dataset().from_tensor_slices((train_image_paths, train_scores))
+        # create a dataset
+        train_dataset = tf.data.Dataset().from_tensor_slices((train_image_paths, train_scores))
         train_dataset = train_dataset.map(parse_data, num_parallel_calls=2)
 
         train_dataset = train_dataset.batch(batchsize)
@@ -93,8 +123,18 @@ def train_generator(batchsize, shuffle=True):
                 yield (X_batch, y_batch)
 
 def val_generator(batchsize):
+    '''
+    Creates a python generator that loads the AVA dataset images without random data
+    augmentation and generates numpy arrays to feed into the Keras model for training.
+
+    Args:
+        batchsize: batchsize for validation set
+
+    Returns:
+        a batch of samples (X_images, y_scores)
+    '''
     with tf.Session() as sess:
-        val_dataset = tfdata.Dataset().from_tensor_slices((val_image_paths, val_scores))
+        val_dataset = tf.data.Dataset().from_tensor_slices((val_image_paths, val_scores))
         val_dataset = val_dataset.map(parse_data_without_augmentation)
 
         val_dataset = val_dataset.batch(batchsize)
@@ -117,22 +157,38 @@ def val_generator(batchsize):
                 X_batch, y_batch = sess.run(val_batch)
                 yield (X_batch, y_batch)
 
-def features_generator(record_path, batchsize, shuffle=True):
+def features_generator(record_path, faeture_size, batchsize, shuffle=True):
+    '''
+    Creates a python generator that loads pre-extracted features from a model
+    and serves it to Keras for pre-training.
+
+    Args:
+        record_path: path to the TF Record file
+        faeture_size: the number of features in each record. Depends on the base model.
+        batchsize: batchsize for training
+        shuffle: whether to shuffle the records
+
+    Returns:
+        a batch of samples (X_features, y_scores)
+    '''
     with tf.Session() as sess:
+        # maps record examples to numpy arrays
+
         def parse_single_record(serialized_example):
             # parse a single record
             example = tf.parse_single_example(
                 serialized_example,
                 features={
-                    'features': tf.FixedLenFeature([1056], tf.float32),
+                    'features': tf.FixedLenFeature([faeture_size], tf.float32),
                     'scores': tf.FixedLenFeature([10], tf.float32),
                 })
 
             features = example['features']
             scores = example['scores']
             return features, scores
 
-        train_dataset = tfdata.TFRecordDataset([record_path])
+        # Loads the TF dataset
+        train_dataset = tf.data.TFRecordDataset([record_path])
         train_dataset = train_dataset.map(parse_single_record, num_parallel_calls=4)
 
         train_dataset = train_dataset.batch(batchsize)
@@ -145,6 +201,7 @@ def parse_single_record(serialized_example):
 
         sess.run(train_iterator.initializer)
 
+        # indefinitely extract batches
         while True:
             try:
                 X_batch, y_batch = sess.run(train_batch)

diff --git a/utils/check_dataset.py b/utils/check_dataset.py
@@ -4,6 +4,12 @@
 
 import tensorflow as tf
 
+'''
+Checks all images from the AVA dataset if they have corrupted jpegs, and lists them for removal.
+
+Removal must be done manually !
+'''
+
 base_images_path = r'D:\Yue\Documents\Datasets\AVA_dataset\images\images\\'
 ava_dataset_path = r'D:\Yue\Documents\Datasets\AVA_dataset\AVA.txt'
 

diff --git a/utils/score_utils.py b/utils/score_utils.py
@@ -1,10 +1,12 @@
 import numpy as np
 
+# calculate mean score for AVA dataset
 def mean_score(scores):
     si = np.arange(1, 11, 1)
     mean = np.sum(scores * si)
     return mean
 
+# calculate standard deviation of scores for AVA dataset
 def std_score(scores):
     si = np.arange(1, 11, 1)
     mean = mean_score(scores)