Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract TV-L1 from HMDB-51 #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions preprocess/class_names.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
punch
pushup
sword_exercise
shoot_ball
dive
sit
turn
throw
swing_baseball
climb_stairs
cartwheel
kick_ball
draw_sword
laugh
sword
hug
eat
handstand
kick
flic_flac
pour
brush_hair
hit
pick
clap
catch
shoot_gun
jump
fall_floor
fencing
run
wave
situp
golf
smile
push
pullup
talk
somersault
climb
shoot_bow
hmdb51_org
stand
ride_horse
chew
ride_bike
dribble
walk
shake_hands
kiss
smoke
drink
74 changes: 74 additions & 0 deletions preprocess/download_hmdb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash

# Script to download the HMDB-51 data set.
#
# usage:
# bash download_hmdb.sh [data dir]
set -e

if [ -z "$1" ]; then
echo "usage download_and_preproces_hmdb.sh [data dir]"
exit
fi

# Useful commands
UNRAR="unrar e"

# Create the output directories.
OUTPUT_DIR="${1%/}"
CURRENT_DIR=$(pwd)
if [ ! -f ${FILENAME} ]; then
mkdir -p "${OUTPUT_DIR}"
else
cd ${OUTPUT_DIR}
fi
REMOTE_URL="http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar"
HMDB_FILE="hmdb51_org.rar"


# Helper function to download and unpack a .unrar file.
function download_and_unrar() {
local DOWNLOAD_URL=${1}
local OUTPUT_DIR=${2}
local FILENAME=${3}

local WORKING_DIR=$(pwd)
cd ${OUTPUT_DIR}

if [ ! -f ${FILENAME} ]; then
echo "Downloading ${FILENAME} to $(pwd)"
wget -nd -c "${DOWNLOAD_URL}"
else
echo "Skipping download of ${FILENAME}"
fi
echo "Unrar ${FILENAME}"
${UNRAR} ${FILENAME}
cd ${WORKING_DIR}
}


function extract_videos(){
local OUTPUT_DIR=${1}

local WORKING_DIR=$(pwd)
cd ${OUTPUT_DIR}

for FOLD in *.rar
do
local CLASS_FOLD="${FOLD%.*}"
mkdir ${CLASS_FOLD}
mv ${FOLD} ${CLASS_FOLD}
cd ${CLASS_FOLD}
${UNRAR} ${FOLD}
rm ${FOLD}
cd ..
done
cd ${WORKING_DIR}
}


# Download the videos
download_and_unrar ${REMOTE_URL} ${OUTPUT_DIR} ${HMDB_FILE}

# Extract the videos
extract_videos ${OUTPUT_DIR}
137 changes: 137 additions & 0 deletions preprocess/hmdb_extract_flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
from cv2 import DualTVL1OpticalFlow_create as DualTVL1
from tensorflow.python.platform import app, flags
import os
import sys
import cv2
import threading



import tensorflow as tf
import numpy as np


DATA_DIR = './tmp/HMDB/videos'
SAVE_DIR = './tmp/HMDB/data/flow'

_EXT = ['.avi', '.mp4']
_IMAGE_SIZE = 224
_CLASS_NAMES = 'class_names.txt'

FLAGS = flags.FLAGS

flags.DEFINE_string('data_dir', DATA_DIR, 'directory containing data.')
flags.DEFINE_string('save_to', SAVE_DIR, 'where to save flow data.')
flags.DEFINE_string('name', 'HMDB', 'dataset name.')
flags.DEFINE_integer('num_threads', 32, 'number of threads.')


def _video_length(video_path):
"""Return the length of a video.


Args:
video_path: String to the video file location.

Returns:
Length of the video.

Raises:
ValueError: Either wrong path or the file extension is not supported.
"""
_, ext = os.path.splitext(video_path)
if not ext in _EXT:
raise ValueError('Extension "%s" not supported' % ext)
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise ValueError("Could not open the file.\n{}".format(video_path))
if cv2.__version__ >= '3.0.0':
CAP_PROP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT
else:
CAP_PROP_FRAME_COUNT = cv2.cv.CV_CAP_PROP_FRAME_COUNT
length = int(cap.get(CAP_PROP_FRAME_COUNT))
return length

def compute_TVL1(video_path):
"""Compute the TV-L1 optical flow."""
TVL1 = DualTVL1()
cap = cv2.VideoCapture(video_path)

ret, frame1 = cap.read()
prev = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
prev = cv2.resize(prev, (_IMAGE_SIZE, _IMAGE_SIZE))
flow = []
vid_len = _video_length(video_path)
for _ in range(vid_len - 2):
ret, frame2 = cap.read()
curr = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
curr = cv2.resize(curr, (_IMAGE_SIZE, _IMAGE_SIZE))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the original author's implementation, image is first resized preserving aspect ratio (the smallest dimension is 256 pixels), then it's cropped to 224x224.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure about this? It says in the README of this repository that:

For RGB, the videos are resized preserving aspect ratio so that the smallest dimension is 256 pixels, with bilinear interpolation. Pixel values are then rescaled between -1 and 1. During training, we randomly select a 224x224 image crop, while during test, we select the center 224x224 image crop from the video. The provided .npy file thus has shape (1, num_frames, 224, 224, 3) for RGB, corresponding to a batch size of 1.

For the Flow stream, after sampling the videos at 25 frames per second, we convert the videos to grayscale. We apply a TV-L1 optical flow algorithm, similar to this code from OpenCV. Pixel values are truncated to the range [-20, 20], then rescaled between -1 and 1. We only use the first two output dimensions, and apply the same cropping as for RGB. The provided .npy file thus has shape (1, num_frames, 224, 224, 2) for Flow, corresponding to a batch size of 1.

I take that to mean that the flow images are not resized at all until they are used as input to the model, which suggests that their resizing happens in-graph.

curr_flow = TVL1.calc(prev, curr, None)
assert(curr_flow.dtype == np.float32)
# truncate [-20, 20]
curr_flow[curr_flow >= 20] = 20
curr_flow[curr_flow <= -20] = -20
# scale to [-1, 1]
max_val = lambda x: max(max(x.flatten()), abs(min(x.flatten())))
curr_flow = curr_flow / max_val(curr_flow)
flow.append(curr_flow)
prev = curr
cap.release()
flow = np.array(flow)
return flow

def _process_video_files(thread_index, filenames, save_to):
for filename in filenames:
flow = compute_TVL1(filename)
fullname, _ = os.path.splitext(filename)
split_name = fullname.split('/')
save_name = os.path.join(save_to, split_name[-2], split_name[-1] + '.npy')
np.save(save_name, flow)
print("%s [thread %d]: %s done." % (datetime.now(), thread_index, filename))
sys.stdout.flush()

def _process_dataset():
filenames = [filename
for class_fold in
tf.gfile.Glob(os.path.join(FLAGS.data_dir, '*'))
for filename in
tf.gfile.Glob(os.path.join(class_fold, '*'))
]
filename_chunk = np.array_split(filenames, FLAGS.num_threads)
threads = []

# Create a mechanism for monitoring when all threads are finished.
coord = tf.train.Coordinator()

# Launch a thread for each batch.
print("Launching %s threads." % FLAGS.num_threads)
for thread_index in xrange(FLAGS.num_threads):
args = (thread_index, filename_chunk[thread_index], FLAGS.save_to)
t = threading.Thread(target=_process_video_files, args=args)
t.start()
threads.append(t)

# Wait for all the threads to terminate.
coord.join(threads)
print("%s: Finished processing all %d videos in data set '%s'." %
(datetime.now(), len(filenames), FLAGS.name))


def main(unused_argv):
if not tf.gfile.IsDirectory(FLAGS.save_to):
tf.gfile.MakeDirs(FLAGS.save_to)
f = open(_CLASS_NAMES)
classes = [cls.strip() for cls in f.readlines()]
for cls in classes:
tf.gfile.MakeDirs(os.path.join(FLAGS.save_to, cls))

_process_dataset()


if __name__ == '__main__':
app.run()
10 changes: 10 additions & 0 deletions preprocess/hmdb_flow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Script to extract the TV-L1 flow from the HMDB-51 data set.
#
# usage:
# bash hmdb_flow.sh [num threads]
NUM_THREADS=${2}

python hmdb_extract_flow.py \
--num_threads=${NUM_THREADS}