-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathcharades_dataloader.py
133 lines (108 loc) · 4.52 KB
/
charades_dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import torch
import torch.utils.data as data_utl
from torch.utils.data.dataloader import default_collate
import numpy as np
import json
import os
import os.path
from tqdm import tqdm
import random
from utils import *
def make_dataset(split_file, split, root, num_classes=157):
gamma = 0.5
tau = 4
ku = 1
dataset = []
with open(split_file, 'r') as f:
data = json.load(f)
print('split!!!!', split)
i = 0
for vid in tqdm(data.keys()):
if data[vid]['subset'] != split:
continue
if not os.path.exists(os.path.join(root, vid + '.npy')):
continue
if len(data[vid]['actions']) < 1:
continue
fts = np.load(os.path.join(root, vid + '.npy'))
num_feat = fts.shape[0]
label = np.zeros((num_feat, num_classes), np.float32)
#
hmap = np.zeros((num_feat, num_classes), np.float32)
action_lengths = []
center_loc = []
num_action = 0
fps = num_feat / data[vid]['duration']
for ann in data[vid]['actions']:
#
if ann[2] < ann[1]:
continue
mid_point = (ann[2] + ann[1]) / 2
for fr in range(0, num_feat, 1):
if fr / fps > ann[1] and fr / fps < ann[2]:
label[fr, ann[0]] = 1 # binary classification
# G* Ground truth Heat-map
# if fr / fps + 1 > mid_point and fr / fps < mid_point:
if (fr+1) / fps > mid_point and fr / fps < mid_point:
center = fr + 1
class_ = ann[0]
action_duration = int((ann[2] - ann[1]) * fps)
radius = int(action_duration / gamma)
generate_gaussian(hmap[:, class_], center, radius, tau, ku)
num_action = num_action + 1
center_loc.append([center, class_])
action_lengths.append([action_duration])
dataset.append((vid, label, data[vid]['duration'], [hmap, num_action, np.asarray(center_loc), np.asarray(action_lengths)]))
i += 1
return dataset
class Charades(data_utl.Dataset):
def __init__(self, split_file, split, root, batch_size, classes, num_clips, skip):
self.data = make_dataset(split_file, split, root, classes)
self.split=split
self.split_file = split_file
self.batch_size = batch_size
self.root = root
self.in_mem = {}
self.num_clips = num_clips
self.skip = skip
def __getitem__(self, index):
entry = self.data[index]
feat = np.load(os.path.join(self.root, entry[0] + '.npy'))
feat = feat.reshape((feat.shape[0], 1, 1, feat.shape[-1]))
features = feat.astype(np.float32)
labels = entry[1]
hmap, num_action, center_loc, action_lengths = entry[3]
# print('center_loc',center_loc.shape)
# center_loc = np.transpose(center_loc, axes=[1, 0])
num_clips = self.num_clips
if self.split in ["training", "testing"]:
if len(features) > num_clips and num_clips > 0:
if self.split == "testing":
random_index = 0
else:
random_index = random.choice(range(0, len(features) - num_clips))
features = features[random_index: random_index + num_clips: 1]
labels = labels[random_index: random_index + num_clips: 1]
hmap = hmap[random_index: random_index + num_clips: 1]
# center_loc = np.transpose(center_loc, axes=[1, 0])
return features, labels, hmap, action_lengths, [entry[0], entry[2], num_action]
def __len__(self):
return len(self.data)
class collate_fn_unisize():
def __init__(self,num_clips):
self.num_clips = num_clips
def charades_collate_fn_unisize(self, batch):
max_len = int(self.num_clips)
max_len1= int(self.num_clips)
new_batch = []
for b in batch:
f = np.zeros((max_len, b[0].shape[1], b[0].shape[2], b[0].shape[3]), np.float32)
m = np.zeros((max_len), np.float32)
l = np.zeros((max_len, b[1].shape[1]), np.float32)
h = np.zeros((max_len, b[2].shape[1]), np.float32)
f[:b[0].shape[0]] = b[0]
m[:b[0].shape[0]] = 1
l[:b[0].shape[0], :] = b[1]
h[:b[0].shape[0], :] = b[2]
new_batch.append([video_to_tensor(f), torch.from_numpy(m), torch.from_numpy(l), b[4], torch.from_numpy(h)])
return default_collate(new_batch)