-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
4,298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2011/_2LMj-4PtdU.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2012/_6AGOfGHzeg.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2013/_8N1uJZUyaY.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2014/_epn5foR_Ts.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2015/_ItWcGtaJro.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2016/_emU23tTUAw.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2017/_6RI-8Ia4do.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2018/_HcDe70cSRU.mkv | ||
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2019/_f7p28YFgvc.mkv |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#helper scripts to check if the shot segment files overlap in two directories | ||
|
||
import os | ||
from tqdm import tqdm | ||
|
||
def generate_file_name(file_path): | ||
|
||
#print(file_path) | ||
scenes_index=file_path.index("Scenes") | ||
file_name=file_path[0:scenes_index-1] | ||
return(file_name) | ||
|
||
def generate_cmd_clip_list(folder_list,base_folder): | ||
|
||
#print(folder_list) | ||
cmd_clip_list=[] | ||
for folder in tqdm(folder_list): | ||
folder_path=os.path.join(base_folder,folder) | ||
file_list=os.listdir(folder_path) | ||
file_list=[os.path.join(folder_path,s) for s in file_list] | ||
cmd_clip_list=cmd_clip_list+file_list | ||
|
||
return(cmd_clip_list) | ||
|
||
|
||
shot_segment_file_v1="/data/digbose92/ambience_detection/codes/shot_segments/shot_segments_v1" | ||
shot_segment_file_v2="/data/digbose92/ambience_detection/codes/shot_segments/shot_segments_v2" | ||
folder="/data/ambience/Condensed_Movies/video_clips_shots_complete" | ||
shot_subfolder=os.listdir(folder) | ||
print(len(set(shot_subfolder))) | ||
|
||
#scene file list v1 and v2 | ||
scene_file_list_v1=os.listdir(shot_segment_file_v1) | ||
scene_file_list_v1=[s for s in scene_file_list_v1 if s.endswith(".csv")] | ||
scene_file_list_v2=os.listdir(shot_segment_file_v2) | ||
|
||
#total scene file list | ||
total_scene_file_list=scene_file_list_v1+scene_file_list_v2 | ||
total_scene_file_list.remove("Nan_label_top_250.csv") | ||
print(len(total_scene_file_list),len(set(total_scene_file_list))) | ||
|
||
#number of total scene files | ||
total_files=32484 | ||
|
||
print('Total number of scene files in v1+v2: ',len(scene_file_list_v1)+len(scene_file_list_v2)) | ||
print('Remaining files: ',total_files-len(scene_file_list_v1)-len(scene_file_list_v2)) | ||
|
||
#check intersection between two lists | ||
intersection_list=list(set(scene_file_list_v1).intersection(scene_file_list_v2)) | ||
print(len(intersection_list)) #currently zero | ||
|
||
#read the list of mkv files in the Condensed movies directory | ||
CMD_clip_file="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/clip_list.txt" | ||
with open(CMD_clip_file,'r') as f: | ||
CMD_clip_list=f.readlines() | ||
|
||
CMD_clip_list_sample=[c.split("\n")[0].split("/")[-1] for c in CMD_clip_list] | ||
#print(CMD_clip_list[0:5]) | ||
|
||
cnt_present_folder=0 #should be 28613 | ||
cnt_mkv_files=0 #should be 28613 | ||
|
||
subfold_present_list=[] | ||
for scene_file in tqdm(total_scene_file_list): | ||
|
||
subfolder_name=generate_file_name(scene_file) | ||
subfold_present_list.append(subfolder_name) | ||
index_subfold=shot_subfolder.index(subfolder_name) | ||
cnt_present_folder+=1 | ||
# except: | ||
# print('Here') | ||
# mkv_filename=subfolder_name+".mkv" | ||
# print(mkv_filename) | ||
# if mkv_filename in CMD_clip_list: | ||
# cnt_mkv_files+=1 | ||
difference_folder=list(set(shot_subfolder)-set(subfold_present_list)) | ||
filename_incomplete_list=[] | ||
for diff_fold in difference_folder: | ||
mkv_filename=diff_fold+".mkv" | ||
if mkv_filename in CMD_clip_list_sample: | ||
cnt_mkv_files+=1 | ||
filename_incomplete_list.append(CMD_clip_list[CMD_clip_list_sample.index(mkv_filename)].split("\n")[0]) | ||
# else: | ||
# print(mkv_filename) | ||
|
||
# print(cnt_present_folder) | ||
print(cnt_mkv_files) | ||
print(len(difference_folder)-cnt_mkv_files) | ||
print(filename_incomplete_list) | ||
|
||
|
||
with open('../data/shots_rerun_incomplete_list.txt','w') as f: | ||
for item in filename_incomplete_list: | ||
f.write("%s \n" % item) | ||
|
||
|
||
# base_folder="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data" | ||
# folder_list=['2011','2012','2013','2014','2015','2016','2017','2018','2019'] | ||
# cmd_clip_list=generate_cmd_clip_list(folder_list,base_folder)#32333 | ||
|
||
#print(len(cmd_clip_list)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/test_shots_data/_8LrZ4NhPmk/ -iname *mp4` Duration: 00:00:03.03 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:00.61 | ||
Duration: 00:00:00.65 | ||
Duration: 00:00:05.82 | ||
Duration: 00:00:03.69 | ||
Duration: 00:00:01.15 | ||
Duration: 00:00:07.26 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:02.07 | ||
Duration: 00:00:01.40 | ||
Duration: 00:00:01.94 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:02.61 | ||
Duration: 00:00:04.15 | ||
Duration: 00:00:02.23 | ||
Duration: 00:00:05.19 | ||
Duration: 00:00:02.07 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:01.77 | ||
Duration: 00:00:01.07 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:02.23 | ||
Duration: 00:00:01.98 | ||
Duration: 00:00:01.69 | ||
Duration: 00:00:02.44 | ||
Duration: 00:00:03.07 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:03.32 | ||
Duration: 00:00:02.44 | ||
Duration: 00:00:03.19 | ||
Duration: 00:00:02.57 | ||
Duration: 00:00:01.82 | ||
Duration: 00:00:04.53 | ||
Duration: 00:00:01.40 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:04.57 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:00.98 | ||
Duration: 00:00:04.40 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:01.77 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:04.36 | ||
Duration: 00:00:01.52 | ||
Duration: 00:00:01.11 | ||
Duration: 00:00:01.48 | ||
Duration: 00:00:01.19 | ||
Duration: 00:00:02.61 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/video_clips_shots_complete/_8LrZ4NhPmk/ -iname *mp4` Duration: 00:00:03.03 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:00.61 | ||
Duration: 00:00:00.65 | ||
Duration: 00:00:05.82 | ||
Duration: 00:00:03.69 | ||
Duration: 00:00:01.15 | ||
Duration: 00:00:07.26 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:02.07 | ||
Duration: 00:00:01.40 | ||
Duration: 00:00:01.94 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:02.61 | ||
Duration: 00:00:04.15 | ||
Duration: 00:00:02.23 | ||
Duration: 00:00:05.19 | ||
Duration: 00:00:02.07 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:01.77 | ||
Duration: 00:00:01.07 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:02.23 | ||
Duration: 00:00:01.98 | ||
Duration: 00:00:01.69 | ||
Duration: 00:00:02.44 | ||
Duration: 00:00:03.07 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:03.32 | ||
Duration: 00:00:02.44 | ||
Duration: 00:00:03.19 | ||
Duration: 00:00:02.57 | ||
Duration: 00:00:01.82 | ||
Duration: 00:00:04.53 | ||
Duration: 00:00:01.40 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:04.57 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:00.98 | ||
Duration: 00:00:04.40 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:01.77 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:04.36 | ||
Duration: 00:00:01.52 | ||
Duration: 00:00:01.11 | ||
Duration: 00:00:01.48 | ||
Duration: 00:00:01.19 | ||
Duration: 00:00:02.61 |
75 changes: 75 additions & 0 deletions
75
preprocess_scripts/duration_txt_files/test_durs_original_ItWcGtaJro.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/video_clips_shots_complete/_ItWcGtaJro -iname *mp4` Duration: 00:00:01.61 | ||
Duration: 00:00:01.15 | ||
Duration: 00:00:02.44 | ||
Duration: 00:00:02.23 | ||
Duration: 00:00:02.73 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:04.36 | ||
Duration: 00:00:03.19 | ||
Duration: 00:00:01.69 | ||
Duration: 00:00:01.27 | ||
Duration: 00:00:01.73 | ||
Duration: 00:00:01.94 | ||
Duration: 00:00:03.86 | ||
Duration: 00:00:03.65 | ||
Duration: 00:00:01.73 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:01.69 | ||
Duration: 00:00:02.11 | ||
Duration: 00:00:02.15 | ||
Duration: 00:00:00.67 | ||
Duration: 00:00:03.78 | ||
Duration: 00:00:03.19 | ||
Duration: 00:00:02.65 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:01.73 | ||
Duration: 00:00:01.02 | ||
Duration: 00:00:02.19 | ||
Duration: 00:00:01.73 | ||
Duration: 00:00:00.98 | ||
Duration: 00:00:03.44 | ||
Duration: 00:00:04.49 | ||
Duration: 00:00:02.48 | ||
Duration: 00:00:01.48 | ||
Duration: 00:00:01.23 | ||
Duration: 00:00:02.69 | ||
Duration: 00:00:03.94 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:01.86 | ||
Duration: 00:00:01.27 | ||
Duration: 00:00:01.48 | ||
Duration: 00:00:01.57 | ||
Duration: 00:00:01.73 | ||
Duration: 00:00:01.65 | ||
Duration: 00:00:01.94 | ||
Duration: 00:00:01.77 | ||
Duration: 00:00:03.07 | ||
Duration: 00:00:01.23 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:02.32 | ||
Duration: 00:00:01.57 | ||
Duration: 00:00:02.36 | ||
Duration: 00:00:00.94 | ||
Duration: 00:00:01.19 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:01.36 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:02.02 | ||
Duration: 00:00:02.78 | ||
Duration: 00:00:02.11 | ||
Duration: 00:00:01.02 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:01.32 | ||
Duration: 00:00:03.03 | ||
Duration: 00:00:01.44 | ||
Duration: 00:00:01.23 | ||
Duration: 00:00:01.23 | ||
Duration: 00:00:03.23 | ||
Duration: 00:00:01.11 | ||
Duration: 00:00:01.11 | ||
Duration: 00:00:01.61 | ||
Duration: 00:00:01.57 | ||
Duration: 00:00:03.61 | ||
Duration: 00:00:01.52 |
65 changes: 65 additions & 0 deletions
65
preprocess_scripts/extract_scenes_condensed_movies_clips.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
from tqdm import tqdm | ||
import pickle | ||
import multiprocessing as mp | ||
import argparse | ||
|
||
destination_scenes_folder='/data/ambience/Condensed_Movies/video_clips_shots_complete' | ||
csv_scenes_folder="/data/ambience/Condensed_Movies/video_clips_shots_stats_complete" | ||
file_list_pickle_file="/data/digbose92/ambience_detection/pkl-files/Condensed_Movies_updated_list_large_set.pkl" | ||
|
||
with open(file_list_pickle_file,"rb") as f: | ||
file_list=pickle.load(f) | ||
|
||
# def extract_scene_clips(idx): | ||
# vid_file=file_list[idx] | ||
# file_key=vid_file.split("/")[-1][:-4] | ||
# subfolder=os.path.join(destination_scenes_folder,file_key) | ||
# csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv") | ||
|
||
# if(os.path.exists(csv_scenes_file) is False): | ||
# os.mkdir(subfolder) | ||
# scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder | ||
# os.system(scene_detect_command) | ||
def extract_scene_clips(idx): | ||
vid_file=file_list[idx] | ||
file_key=vid_file.split("/")[-1][:-4] | ||
subfolder=os.path.join(destination_scenes_folder,file_key) | ||
csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv") | ||
|
||
if(os.path.exists(subfolder) is False): | ||
os.mkdir(subfolder) | ||
scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder | ||
os.system(scene_detect_command) | ||
|
||
#condensed_movies_folder='/data/ambience/Condensed_Movies/video_clips_downsampled' | ||
|
||
|
||
# #print(len(condensed_movies_folder)) | ||
# for vid_file in tqdm(file_list): | ||
# file_key=vid_file.split("/")[-1][:-4] | ||
# subfolder=os.path.join(destination_scenes_folder,file_key) | ||
# csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv") | ||
|
||
# if(os.path.exists(csv_scenes_file) is False): | ||
# os.mkdir(subfolder) | ||
# scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder | ||
# os.system(scene_detect_command) | ||
#print(scene_detect_command) | ||
#scene_det | ||
#print(subfolder) | ||
#print(file_key) | ||
def main(args): | ||
# data = [x.rstrip().split(',') for x in open('/data/movies/movie_sounds_50_mturk_test.csv').readlines()[1:]] | ||
# data = [x.rstrip().split(',') for x in open('/data/rajatheb/sound_events/isound_event_labels.csv').readlines()[1:]] | ||
pool = mp.Pool(args.nj) | ||
pool.map(extract_scene_clips, list(range(len(file_list)))) | ||
pool.close() | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='') | ||
parser.add_argument('--nj', default=16, type=int, help='number of parallel processes') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
from tqdm import tqdm | ||
import pickle | ||
|
||
destination_folder="/data/ambience/Condensed_Movies/test_shots_data" | ||
video_file="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2012/_8LrZ4NhPmk.mkv" | ||
subfolder=os.path.join(destination_folder,os.path.splitext(video_file.split("/")[-1])[0]) | ||
csv_file="test.csv" | ||
scene_detect_command="scenedetect --input "+video_file+ " -s "+csv_file+" detect-content list-scenes split-video -o "+subfolder | ||
|
||
os.system(scene_detect_command) |
Oops, something went wrong.