upload I3D Anet features

Alvin-Zeng · Jul 4, 2020 · 3a0e6ff · 3a0e6ff
1 parent 46428da
commit 3a0e6ff
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -16,6 +16,8 @@ Runhao Zeng*, Wenbing Huang*, Mingkui Tan, Yu Rong, Peilin Zhao, Junzhou Huang,
 20/12/2019 We have uploaded the RGB features, trained models and evaluation results! We found that increasing the number of proposals to 800 in the testing
 further boosts the performance on THUMOS14. We have also updated the proposal list.
 
+04/07/2020 We have uploaded the I3D features on Anet along with the training configurations files in data/dataset_cfg.yaml.
+
 
 # Contents
 ----
@@ -75,7 +77,10 @@ You can use the [official ActivityNet downloader][anet_down] to download videos
 
 ### Download Features
 
-Here, we provide the I3D features (RGB+Flow) for training and testing. You can download it from [Google Cloud][features_google] or [Baidu Cloud][features_baidu].
+Here, we provide the I3D features (RGB+Flow) for training and testing. 
+THUMOS14: You can download it from [Google Cloud][features_google] or [Baidu Cloud][features_baidu].
+Anet: You can download the I3D Flow features from [Baidu Cloud][features_baidu_anet_flow] (password: jbsa) 
+and the I3D RGB features from [Google Cloud][features_google_anet_rgb] (Note: set the interval to 16 in ops/I3D_Pooling_Anet.py when training with RGB features)
 
 
 ## Training PGCN
@@ -184,3 +189,5 @@ Runhao Zeng: [email protected]
 [emv]:https://github.com/zbwglory/MV-release
 [features_google]: https://drive.google.com/open?id=1C6829qlU_vfuiPdJSqHz3qSqqc0SDCr_
 [features_baidu]: https://pan.baidu.com/s/1Dqbcm5PKbK-8n0ZT9KzxGA
+[features_baidu_anet_flow]: https://pan.baidu.com/s/1irWHfdF8RJCQcy1D10GlfA 
+[features_google_anet_rgb]: https://drive.google.com/drive/folders/1UHT3S--vo8MCT8AX3ajHE6TcAThDxFlF?usp=sharing 
diff --git a/__pycache__/pgcn_models.cpython-36.pyc b/__pycache__/pgcn_models.cpython-36.pyc
diff --git a/data/dataset_cfg.yaml b/data/dataset_cfg.yaml
@@ -1,8 +1,8 @@
 thumos14:
   dataset_configs:
 
-    train_ft_path: /home/datasets/THUMOS14/I3D_video_level/Flow_Val_All
-    test_ft_path: /home/datasets/THUMOS14/I3D_video_level/Flow_Test_All
+    train_ft_path: /home/datasets/THUMOS14/I3D_video_level/Rgb_Train_All
+    test_ft_path: /home/datasets/THUMOS14/I3D_video_level/Rgb_Test_All
 
     train_dict_path: data/thumos14_train_prop_dict.pkl
     val_dict_path: data/thumos14_val_prop_dict.pkl
@@ -47,3 +47,52 @@ thumos14:
     nms_threshold: 0.2
 
 
+
+activitynet1.3:
+  dataset_configs:
+
+    train_ft_path: /home/datasets/
+    test_ft_path: /home/datasets/
+
+    train_dict_path: data/
+    val_dict_path: data/
+    test_dict_path: data/
+    train_prop_file: data/
+    test_prop_file: data/
+
+    training_epoch_multiplier: 5
+    testing_epoch_multiplier: 1
+
+    fg_iou_thresh: 0.7
+    bg_iou_thresh: 0.1
+    incomplete_iou_thresh: 0.6
+    bg_coverage_thresh: 0
+    incomplete_overlap_thresh: 0
+    prop_per_video: 8
+    fg_ratio: 1
+    bg_ratio: 1
+    incomplete_ratio: 6
+    iou_threshold: 0.7
+    dis_threshold: 0
+    starting_ratio: 0.5
+    ending_ratio: 0.5
+
+
+  graph_configs:
+    adj_num: 21
+    child_num: 4
+    iou_num: 8
+    dis_num: 2
+
+  model_configs:
+    num_class: 200
+    act_feat_dim: 1024
+    comp_feat_dim: 3072
+    dropout: 0.8
+    gcn_dropout: 0.7
+
+
+  evaluation:
+    top_k: 100
+    nms_threshold: 0.4
+
diff --git a/ops/I3D_Pooling.py b/ops/I3D_Pooling.py
@@ -7,7 +7,7 @@
 
 def I3D_Pooling(prop_indices, vid, ft_path, n_frame, n_seg=1):
 
-    ft_tensor = torch.load(os.path.join(ft_path, vid))
+    ft_tensor = torch.load(os.path.join(ft_path, vid)).float()
     fts_all_act = []
     fts_all_comp = []
 

diff --git a/ops/I3D_Pooling_Anet.py b/ops/I3D_Pooling_Anet.py
@@ -0,0 +1,94 @@
+import torch
+import os
+import numpy as np
+from numpy.random import randint
+import pandas as pd
+import time
+
+def I3D_Pooling(prop_indices, vid, ft_path, n_frame, n_seg, vids=None):
+    # ft_tensor = torch.load(os.path.join(ft_path, vid))
+    fts_all_act = []
+    fts_all_comp = []
+
+    if vids is not None:
+        for cnt, prop in enumerate(prop_indices):
+
+            ft_tensor = torch.load(os.path.join(ft_path, vids[cnt]))
+            act_s = prop[0]
+            act_e = prop[1]
+            comp_s = prop[2]
+            comp_e = prop[3]
+
+            start_ft = feature_pooling(comp_s, act_s, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            end_ft = feature_pooling(act_e, comp_e, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            act_ft = feature_pooling(act_s, act_e, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            comp_ft = [start_ft, act_ft, end_ft]
+            comp_ft = torch.cat(comp_ft, dim=0)
+
+            fts_all_act.append(act_ft)
+            fts_all_comp.append(comp_ft)
+    else:
+        ft_tensor = torch.load(os.path.join(ft_path, vid))
+        for cnt, prop in enumerate(prop_indices):
+
+            act_s = prop[0]
+            act_e = prop[1]
+            comp_s = prop[2]
+            comp_e = prop[3]
+
+            start_ft = feature_pooling(comp_s, act_s, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            end_ft = feature_pooling(act_e, comp_e, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            act_ft = feature_pooling(act_s, act_e, vid,
+                                    n_frame, n_seg, 'max', ft_tensor)
+            comp_ft = [start_ft, act_ft, end_ft]
+            comp_ft = torch.cat(comp_ft, dim=0)
+
+            fts_all_act.append(act_ft)
+            fts_all_comp.append(comp_ft)
+
+
+    fts_all_act = torch.stack(fts_all_act)
+    fts_all_comp = torch.stack(fts_all_comp)
+
+    return fts_all_act, fts_all_comp
+
+def feature_pooling(start_ind, end_ind, vid, n_frame, n_seg, type, ft_tensor):
+    #for turn
+    interval = 8
+    clip_length = 64
+
+    fts_all = []
+
+    offsets, average_duration = sample_indices(start_ind, end_ind, n_seg)
+
+    ft_num = ft_tensor.size()[0]
+
+    for off in offsets:
+
+        start_unit = int(min(ft_num-1, np.floor(float(start_ind+off)*100/n_frame)))
+        end_unit = int(min(ft_num-2, np.ceil(float(start_ind+off+average_duration)*100/n_frame)))
+
+        if start_unit < end_unit:
+            fts_all.append(torch.max(ft_tensor[start_unit: end_unit+1, :], 0)[0])
+        else:
+            fts_all.append(ft_tensor[start_unit])
+
+    fts_all = torch.cat(fts_all)
+
+    return fts_all.squeeze()
+
+def sample_indices(start, end, num_seg):
+    """
+    :param record: VideoRecord
+    :return: list
+    """
+    valid_length = end - start
+    average_duration = (valid_length + 1) / num_seg
+    offsets = np.multiply(list(range(num_seg)), average_duration)
+
+    return offsets, average_duration
diff --git a/ops/__pycache__/I3D_Pooling.cpython-36.pyc b/ops/__pycache__/I3D_Pooling.cpython-36.pyc