Skip to content

Commit

Permalink
Update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
titu1994 committed Jan 7, 2018
1 parent 39b5eaa commit 1c8d655
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 9 deletions.
75 changes: 66 additions & 9 deletions data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
import glob

import tensorflow as tf
from tensorflow import data as tfdata

# path to the images and the text file which holds the scores and ids
base_images_path = r'D:\Yue\Documents\Datasets\AVA_dataset\images\images\\'
ava_dataset_path = r'D:\Yue\Documents\Datasets\AVA_dataset\AVA.txt'

IMAGE_SIZE = 224
BASE_LEN = len(base_images_path) - 1

files = glob.glob(base_images_path + "*.jpg")
files = sorted(files)
Expand Down Expand Up @@ -46,8 +45,19 @@

print('Train set size : ', train_image_paths.shape, train_scores.shape)
print('Val set size : ', val_image_paths.shape, val_scores.shape)
print('Train and validation datasets ready !')

def parse_data(filename, scores):
'''
Loads the image file, and randomly applies crops and flips to each image.
Args:
filename: the filename from the record
scores: the scores from the record
Returns:
an image referred to by the filename and its scores
'''
image = tf.read_file(filename)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize_images(image, (256, 256))
Expand All @@ -57,17 +67,37 @@ def parse_data(filename, scores):
return image, scores

def parse_data_without_augmentation(filename, scores):
'''
Loads the image file without any augmentation. Used for validation set.
Args:
filename: the filename from the record
scores: the scores from the record
Returns:
an image referred to by the filename and its scores
'''
image = tf.read_file(filename)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize_images(image, (IMAGE_SIZE, IMAGE_SIZE))
image = (tf.cast(image, tf.float32) - 127.5) / 127.5
return image, scores

print('Train and validation datasets ready !')

def train_generator(batchsize, shuffle=True):
'''
Creates a python generator that loads the AVA dataset images with random data
augmentation and generates numpy arrays to feed into the Keras model for training.
Args:
batchsize: batchsize for training
shuffle: whether to shuffle the dataset
Returns:
a batch of samples (X_images, y_scores)
'''
with tf.Session() as sess:
train_dataset = tfdata.Dataset().from_tensor_slices((train_image_paths, train_scores))
# create a dataset
train_dataset = tf.data.Dataset().from_tensor_slices((train_image_paths, train_scores))
train_dataset = train_dataset.map(parse_data, num_parallel_calls=2)

train_dataset = train_dataset.batch(batchsize)
Expand All @@ -93,8 +123,18 @@ def train_generator(batchsize, shuffle=True):
yield (X_batch, y_batch)

def val_generator(batchsize):
'''
Creates a python generator that loads the AVA dataset images without random data
augmentation and generates numpy arrays to feed into the Keras model for training.
Args:
batchsize: batchsize for validation set
Returns:
a batch of samples (X_images, y_scores)
'''
with tf.Session() as sess:
val_dataset = tfdata.Dataset().from_tensor_slices((val_image_paths, val_scores))
val_dataset = tf.data.Dataset().from_tensor_slices((val_image_paths, val_scores))
val_dataset = val_dataset.map(parse_data_without_augmentation)

val_dataset = val_dataset.batch(batchsize)
Expand All @@ -117,22 +157,38 @@ def val_generator(batchsize):
X_batch, y_batch = sess.run(val_batch)
yield (X_batch, y_batch)

def features_generator(record_path, batchsize, shuffle=True):
def features_generator(record_path, faeture_size, batchsize, shuffle=True):
'''
Creates a python generator that loads pre-extracted features from a model
and serves it to Keras for pre-training.
Args:
record_path: path to the TF Record file
faeture_size: the number of features in each record. Depends on the base model.
batchsize: batchsize for training
shuffle: whether to shuffle the records
Returns:
a batch of samples (X_features, y_scores)
'''
with tf.Session() as sess:
# maps record examples to numpy arrays

def parse_single_record(serialized_example):
# parse a single record
example = tf.parse_single_example(
serialized_example,
features={
'features': tf.FixedLenFeature([1056], tf.float32),
'features': tf.FixedLenFeature([faeture_size], tf.float32),
'scores': tf.FixedLenFeature([10], tf.float32),
})

features = example['features']
scores = example['scores']
return features, scores

train_dataset = tfdata.TFRecordDataset([record_path])
# Loads the TF dataset
train_dataset = tf.data.TFRecordDataset([record_path])
train_dataset = train_dataset.map(parse_single_record, num_parallel_calls=4)

train_dataset = train_dataset.batch(batchsize)
Expand All @@ -145,6 +201,7 @@ def parse_single_record(serialized_example):

sess.run(train_iterator.initializer)

# indefinitely extract batches
while True:
try:
X_batch, y_batch = sess.run(train_batch)
Expand Down
6 changes: 6 additions & 0 deletions utils/check_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

import tensorflow as tf

'''
Checks all images from the AVA dataset if they have corrupted jpegs, and lists them for removal.
Removal must be done manually !
'''

base_images_path = r'D:\Yue\Documents\Datasets\AVA_dataset\images\images\\'
ava_dataset_path = r'D:\Yue\Documents\Datasets\AVA_dataset\AVA.txt'

Expand Down
2 changes: 2 additions & 0 deletions utils/score_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import numpy as np

# calculate mean score for AVA dataset
def mean_score(scores):
si = np.arange(1, 11, 1)
mean = np.sum(scores * si)
return mean

# calculate standard deviation of scores for AVA dataset
def std_score(scores):
si = np.arange(1, 11, 1)
mean = mean_score(scores)
Expand Down

0 comments on commit 1c8d655

Please sign in to comment.