Skip to content

Commit

Permalink
Added movieCLIP taxonomy details
Browse files Browse the repository at this point in the history
  • Loading branch information
digbose92 committed Feb 27, 2023
1 parent 8b5eea2 commit 8e10450
Show file tree
Hide file tree
Showing 9 changed files with 4,298 additions and 0 deletions.
9 changes: 9 additions & 0 deletions data/shot_test_samples.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2011/_2LMj-4PtdU.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2012/_6AGOfGHzeg.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2013/_8N1uJZUyaY.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2014/_epn5foR_Ts.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2015/_ItWcGtaJro.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2016/_emU23tTUAw.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2017/_6RI-8Ia4do.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2018/_HcDe70cSRU.mkv
/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2019/_f7p28YFgvc.mkv
3,752 changes: 3,752 additions & 0 deletions data/shots_rerun_incomplete_list.txt

Large diffs are not rendered by default.

101 changes: 101 additions & 0 deletions preprocess_scripts/check_shot_segment_files_overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#helper scripts to check if the shot segment files overlap in two directories

import os
from tqdm import tqdm

def generate_file_name(file_path):

#print(file_path)
scenes_index=file_path.index("Scenes")
file_name=file_path[0:scenes_index-1]
return(file_name)

def generate_cmd_clip_list(folder_list,base_folder):

#print(folder_list)
cmd_clip_list=[]
for folder in tqdm(folder_list):
folder_path=os.path.join(base_folder,folder)
file_list=os.listdir(folder_path)
file_list=[os.path.join(folder_path,s) for s in file_list]
cmd_clip_list=cmd_clip_list+file_list

return(cmd_clip_list)


shot_segment_file_v1="/data/digbose92/ambience_detection/codes/shot_segments/shot_segments_v1"
shot_segment_file_v2="/data/digbose92/ambience_detection/codes/shot_segments/shot_segments_v2"
folder="/data/ambience/Condensed_Movies/video_clips_shots_complete"
shot_subfolder=os.listdir(folder)
print(len(set(shot_subfolder)))

#scene file list v1 and v2
scene_file_list_v1=os.listdir(shot_segment_file_v1)
scene_file_list_v1=[s for s in scene_file_list_v1 if s.endswith(".csv")]
scene_file_list_v2=os.listdir(shot_segment_file_v2)

#total scene file list
total_scene_file_list=scene_file_list_v1+scene_file_list_v2
total_scene_file_list.remove("Nan_label_top_250.csv")
print(len(total_scene_file_list),len(set(total_scene_file_list)))

#number of total scene files
total_files=32484

print('Total number of scene files in v1+v2: ',len(scene_file_list_v1)+len(scene_file_list_v2))
print('Remaining files: ',total_files-len(scene_file_list_v1)-len(scene_file_list_v2))

#check intersection between two lists
intersection_list=list(set(scene_file_list_v1).intersection(scene_file_list_v2))
print(len(intersection_list)) #currently zero

#read the list of mkv files in the Condensed movies directory
CMD_clip_file="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/clip_list.txt"
with open(CMD_clip_file,'r') as f:
CMD_clip_list=f.readlines()

CMD_clip_list_sample=[c.split("\n")[0].split("/")[-1] for c in CMD_clip_list]
#print(CMD_clip_list[0:5])

cnt_present_folder=0 #should be 28613
cnt_mkv_files=0 #should be 28613

subfold_present_list=[]
for scene_file in tqdm(total_scene_file_list):

subfolder_name=generate_file_name(scene_file)
subfold_present_list.append(subfolder_name)
index_subfold=shot_subfolder.index(subfolder_name)
cnt_present_folder+=1
# except:
# print('Here')
# mkv_filename=subfolder_name+".mkv"
# print(mkv_filename)
# if mkv_filename in CMD_clip_list:
# cnt_mkv_files+=1
difference_folder=list(set(shot_subfolder)-set(subfold_present_list))
filename_incomplete_list=[]
for diff_fold in difference_folder:
mkv_filename=diff_fold+".mkv"
if mkv_filename in CMD_clip_list_sample:
cnt_mkv_files+=1
filename_incomplete_list.append(CMD_clip_list[CMD_clip_list_sample.index(mkv_filename)].split("\n")[0])
# else:
# print(mkv_filename)

# print(cnt_present_folder)
print(cnt_mkv_files)
print(len(difference_folder)-cnt_mkv_files)
print(filename_incomplete_list)


with open('../data/shots_rerun_incomplete_list.txt','w') as f:
for item in filename_incomplete_list:
f.write("%s \n" % item)


# base_folder="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data"
# folder_list=['2011','2012','2013','2014','2015','2016','2017','2018','2019']
# cmd_clip_list=generate_cmd_clip_list(folder_list,base_folder)#32333

#print(len(cmd_clip_list))
52 changes: 52 additions & 0 deletions preprocess_scripts/duration_txt_files/test_durs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/test_shots_data/_8LrZ4NhPmk/ -iname *mp4` Duration: 00:00:03.03
Duration: 00:00:01.36
Duration: 00:00:01.86
Duration: 00:00:00.61
Duration: 00:00:00.65
Duration: 00:00:05.82
Duration: 00:00:03.69
Duration: 00:00:01.15
Duration: 00:00:07.26
Duration: 00:00:02.02
Duration: 00:00:02.07
Duration: 00:00:01.40
Duration: 00:00:01.94
Duration: 00:00:01.86
Duration: 00:00:02.61
Duration: 00:00:04.15
Duration: 00:00:02.23
Duration: 00:00:05.19
Duration: 00:00:02.07
Duration: 00:00:02.19
Duration: 00:00:01.77
Duration: 00:00:01.07
Duration: 00:00:01.61
Duration: 00:00:02.23
Duration: 00:00:01.98
Duration: 00:00:01.69
Duration: 00:00:02.44
Duration: 00:00:03.07
Duration: 00:00:02.19
Duration: 00:00:03.32
Duration: 00:00:02.44
Duration: 00:00:03.19
Duration: 00:00:02.57
Duration: 00:00:01.82
Duration: 00:00:04.53
Duration: 00:00:01.40
Duration: 00:00:02.19
Duration: 00:00:04.57
Duration: 00:00:01.44
Duration: 00:00:01.86
Duration: 00:00:00.98
Duration: 00:00:04.40
Duration: 00:00:01.61
Duration: 00:00:01.77
Duration: 00:00:02.02
Duration: 00:00:01.61
Duration: 00:00:04.36
Duration: 00:00:01.52
Duration: 00:00:01.11
Duration: 00:00:01.48
Duration: 00:00:01.19
Duration: 00:00:02.61
52 changes: 52 additions & 0 deletions preprocess_scripts/duration_txt_files/test_durs_orig.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/video_clips_shots_complete/_8LrZ4NhPmk/ -iname *mp4` Duration: 00:00:03.03
Duration: 00:00:01.36
Duration: 00:00:01.86
Duration: 00:00:00.61
Duration: 00:00:00.65
Duration: 00:00:05.82
Duration: 00:00:03.69
Duration: 00:00:01.15
Duration: 00:00:07.26
Duration: 00:00:02.02
Duration: 00:00:02.07
Duration: 00:00:01.40
Duration: 00:00:01.94
Duration: 00:00:01.86
Duration: 00:00:02.61
Duration: 00:00:04.15
Duration: 00:00:02.23
Duration: 00:00:05.19
Duration: 00:00:02.07
Duration: 00:00:02.19
Duration: 00:00:01.77
Duration: 00:00:01.07
Duration: 00:00:01.61
Duration: 00:00:02.23
Duration: 00:00:01.98
Duration: 00:00:01.69
Duration: 00:00:02.44
Duration: 00:00:03.07
Duration: 00:00:02.19
Duration: 00:00:03.32
Duration: 00:00:02.44
Duration: 00:00:03.19
Duration: 00:00:02.57
Duration: 00:00:01.82
Duration: 00:00:04.53
Duration: 00:00:01.40
Duration: 00:00:02.19
Duration: 00:00:04.57
Duration: 00:00:01.44
Duration: 00:00:01.86
Duration: 00:00:00.98
Duration: 00:00:04.40
Duration: 00:00:01.61
Duration: 00:00:01.77
Duration: 00:00:02.02
Duration: 00:00:01.61
Duration: 00:00:04.36
Duration: 00:00:01.52
Duration: 00:00:01.11
Duration: 00:00:01.48
Duration: 00:00:01.19
Duration: 00:00:02.61
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
]633;C]633;E;for fl in `find /data/ambience/Condensed_Movies/video_clips_shots_complete/_ItWcGtaJro -iname *mp4` Duration: 00:00:01.61
Duration: 00:00:01.15
Duration: 00:00:02.44
Duration: 00:00:02.23
Duration: 00:00:02.73
Duration: 00:00:01.36
Duration: 00:00:04.36
Duration: 00:00:03.19
Duration: 00:00:01.69
Duration: 00:00:01.27
Duration: 00:00:01.73
Duration: 00:00:01.94
Duration: 00:00:03.86
Duration: 00:00:03.65
Duration: 00:00:01.73
Duration: 00:00:02.19
Duration: 00:00:02.02
Duration: 00:00:01.69
Duration: 00:00:02.11
Duration: 00:00:02.15
Duration: 00:00:00.67
Duration: 00:00:03.78
Duration: 00:00:03.19
Duration: 00:00:02.65
Duration: 00:00:02.02
Duration: 00:00:01.73
Duration: 00:00:01.02
Duration: 00:00:02.19
Duration: 00:00:01.73
Duration: 00:00:00.98
Duration: 00:00:03.44
Duration: 00:00:04.49
Duration: 00:00:02.48
Duration: 00:00:01.48
Duration: 00:00:01.23
Duration: 00:00:02.69
Duration: 00:00:03.94
Duration: 00:00:01.44
Duration: 00:00:01.86
Duration: 00:00:01.27
Duration: 00:00:01.48
Duration: 00:00:01.57
Duration: 00:00:01.73
Duration: 00:00:01.65
Duration: 00:00:01.94
Duration: 00:00:01.77
Duration: 00:00:03.07
Duration: 00:00:01.23
Duration: 00:00:01.61
Duration: 00:00:01.36
Duration: 00:00:02.32
Duration: 00:00:01.57
Duration: 00:00:02.36
Duration: 00:00:00.94
Duration: 00:00:01.19
Duration: 00:00:01.36
Duration: 00:00:01.36
Duration: 00:00:01.44
Duration: 00:00:02.02
Duration: 00:00:02.78
Duration: 00:00:02.11
Duration: 00:00:01.02
Duration: 00:00:01.44
Duration: 00:00:01.32
Duration: 00:00:03.03
Duration: 00:00:01.44
Duration: 00:00:01.23
Duration: 00:00:01.23
Duration: 00:00:03.23
Duration: 00:00:01.11
Duration: 00:00:01.11
Duration: 00:00:01.61
Duration: 00:00:01.57
Duration: 00:00:03.61
Duration: 00:00:01.52
65 changes: 65 additions & 0 deletions preprocess_scripts/extract_scenes_condensed_movies_clips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
import multiprocessing as mp
import argparse

destination_scenes_folder='/data/ambience/Condensed_Movies/video_clips_shots_complete'
csv_scenes_folder="/data/ambience/Condensed_Movies/video_clips_shots_stats_complete"
file_list_pickle_file="/data/digbose92/ambience_detection/pkl-files/Condensed_Movies_updated_list_large_set.pkl"

with open(file_list_pickle_file,"rb") as f:
file_list=pickle.load(f)

# def extract_scene_clips(idx):
# vid_file=file_list[idx]
# file_key=vid_file.split("/")[-1][:-4]
# subfolder=os.path.join(destination_scenes_folder,file_key)
# csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv")

# if(os.path.exists(csv_scenes_file) is False):
# os.mkdir(subfolder)
# scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder
# os.system(scene_detect_command)
def extract_scene_clips(idx):
vid_file=file_list[idx]
file_key=vid_file.split("/")[-1][:-4]
subfolder=os.path.join(destination_scenes_folder,file_key)
csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv")

if(os.path.exists(subfolder) is False):
os.mkdir(subfolder)
scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder
os.system(scene_detect_command)

#condensed_movies_folder='/data/ambience/Condensed_Movies/video_clips_downsampled'


# #print(len(condensed_movies_folder))
# for vid_file in tqdm(file_list):
# file_key=vid_file.split("/")[-1][:-4]
# subfolder=os.path.join(destination_scenes_folder,file_key)
# csv_scenes_file=os.path.join(csv_scenes_folder,file_key+".csv")

# if(os.path.exists(csv_scenes_file) is False):
# os.mkdir(subfolder)
# scene_detect_command="scenedetect --input "+vid_file+ " -s "+csv_scenes_file+" detect-content list-scenes split-video -o "+subfolder
# os.system(scene_detect_command)
#print(scene_detect_command)
#scene_det
#print(subfolder)
#print(file_key)
def main(args):
# data = [x.rstrip().split(',') for x in open('/data/movies/movie_sounds_50_mturk_test.csv').readlines()[1:]]
# data = [x.rstrip().split(',') for x in open('/data/rajatheb/sound_events/isound_event_labels.csv').readlines()[1:]]
pool = mp.Pool(args.nj)
pool.map(extract_scene_clips, list(range(len(file_list))))
pool.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='')
parser.add_argument('--nj', default=16, type=int, help='number of parallel processes')
args = parser.parse_args()
main(args)
13 changes: 13 additions & 0 deletions preprocess_scripts/test_shot_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle

destination_folder="/data/ambience/Condensed_Movies/test_shots_data"
video_file="/data/ambience/Condensed_Movies/Condensed_Movies_downloaded_data/2012/_8LrZ4NhPmk.mkv"
subfolder=os.path.join(destination_folder,os.path.splitext(video_file.split("/")[-1])[0])
csv_file="test.csv"
scene_detect_command="scenedetect --input "+video_file+ " -s "+csv_file+" detect-content list-scenes split-video -o "+subfolder

os.system(scene_detect_command)
Loading

0 comments on commit 8e10450

Please sign in to comment.