Skip to content

Commit

Permalink
Added label distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
digbose92 committed Oct 1, 2023
1 parent 8775c1f commit eab3663
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 0 deletions.
61 changes: 61 additions & 0 deletions preprocess_scripts/compute_distribution_visual_scene_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle
from collections import Counter
#read each file and extract the labels

def read_txt_file(txt_file_path):

label_list=[]
print('Loading the file:%s' %(txt_file_path))
with open(txt_file_path, 'r') as f:
lines = f.readlines()
#lines = [x.strip() for x in lines]


for line in tqdm(lines):
label_c=line.strip().split(' ')[1:]
label_c=[int(l) for l in label_c]
label_list=label_list+label_c

return label_list


train_file="../split_files/train_multi_label_thresh_0_4_0_1_150_labels.txt"
val_file="../split_files/val_multi_label_thresh_0_4_0_1_150_labels.txt"
test_file="../split_files/test_multi_label_thresh_0_4_0_1_150_labels.txt"
label_map_file="../split_files/label_2_ind_multi_label_thresh_0_4_0_1_150_label_map.pkl"


train_labels=read_txt_file(train_file)
val_labels=read_txt_file(val_file)
test_labels=read_txt_file(test_file)

total_labels=train_labels+val_labels+test_labels

with open(label_map_file, 'rb') as f:
label_map = pickle.load(f)

#obtain the reverse map
reverse_label_map={v:k for k,v in label_map.items()}

#compute the distribution of the labels
label_names=[reverse_label_map[l] for l in total_labels]

label_occurence=Counter(label_names).most_common(150)

#save counter dict as dataframe
df=pd.DataFrame(label_occurence,columns=['label','count'])

df.to_csv('../split_files/label_distribution_multi_label_thresh_0_4_0_1_150.csv',index=False)









151 changes: 151 additions & 0 deletions split_files/label_distribution_multi_label_thresh_0_4_0_1_150.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
label,count
cockpit,12426
car,11128
locker room,8646
ballroom,7786
banquet,6531
bedroom,5126
cab,4603
stage,4347
funeral,4242
shooting range,3611
boxing ring,3422
helicopter,3198
club,3087
courtroom,2962
dining room,2879
truck,2863
animal shelter,2620
control room,2458
bathroom,2427
war room,2418
elevator,2371
morgue,2144
kitchen,2119
bar,1849
plane,1773
living room,1719
automotive repair,1651
pool,1526
batting cage,1498
desert,1449
baseball field,1322
boat,1264
room,1237
concert hall,1201
basketball court,1187
shuttle,1168
sea,1148
zoo,977
gym,977
classroom,976
closet,955
corridor,862
computer room,856
race track,837
arena,835
battlefield,834
bus,791
lounge,664
cave,662
penthouse,654
hospital,627
salon,626
makeup studio,612
ship,611
balcony,592
stairs,568
stadium,565
ice rink,554
train,546
fair,539
lobby,531
deck,521
beach,508
casino,504
restaurant,458
attic,457
foundry,423
subway,370
laboratory,366
tunnel,358
bowling alley,342
suburban,318
overpass,309
theater,299
auditorium,295
shore,293
retail,277
construction site,273
parking,256
basement,238
wagon,231
press room,226
road,221
conference room,221
swamp,216
golf course,213
hot spring,209
graveyard,201
clinic,197
waterfall,194
tent,180
mountain,179
forest,164
grove,161
river,157
bullring,156
bridge,154
office,151
playground,151
mall,151
skyline,150
tennis court,140
studio,121
garden,120
amusement park,110
market,106
tower,104
archaeological site,103
gas station,101
cafe,96
sandbank,96
downtown,88
agriculture field,82
prison,80
library,79
art gallery,78
factory,77
airport,76
bakery,70
apartment,65
cabin,64
farm,63
pond,61
kindergarten,60
school,59
garage,54
hangar,51
booth,49
castle,41
cellar,40
alley,37
hall,35
dorm,34
mansion,34
fire station,33
station,33
chapel,32
yard,32
church,30
walkway,28
police station,25
loft,20
park,15
lake,15
valley,14
harbor,12
inn,10
plaza,10
street,10
grassland,8

0 comments on commit eab3663

Please sign in to comment.