forked from ChelsieLei/EZ-HOI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcoco_text_label.py
479 lines (471 loc) · 49.7 KB
/
vcoco_text_label.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
# vcoco_obj_text_label = [(0, 'a photo of a person and a person'), (1, 'a photo of a person and a bicycle'),
# (2, 'a photo of a person and a car'), (3, 'a photo of a person and a motorcycle'),
# (4, 'a photo of a person and an airplane'), (5, 'a photo of a person and a bus'),
# (6, 'a photo of a person and a train'), (7, 'a photo of a person and a truck'),
# (8, 'a photo of a person and a boat'), (9, 'a photo of a person and a traffic light'),
# (10, 'a photo of a person and a fire hydrant'), (11, 'a photo of a person and a stop sign'),
# (12, 'a photo of a person and a parking meter'), (13, 'a photo of a person and a bench'),
# (14, 'a photo of a person and a bird'), (15, 'a photo of a person and a cat'),
# (16, 'a photo of a person and a dog'), (17, 'a photo of a person and a horse'),
# (18, 'a photo of a person and a sheep'), (19, 'a photo of a person and a cow'),
# (20, 'a photo of a person and an elephant'), (21, 'a photo of a person and a bear'),
# (22, 'a photo of a person and a zebra'), (23, 'a photo of a person and a giraffe'),
# (24, 'a photo of a person and a backpack'), (25, 'a photo of a person and a umbrella'),
# (26, 'a photo of a person and a handbag'), (27, 'a photo of a person and a tie'),
# (28, 'a photo of a person and a suitcase'), (29, 'a photo of a person and a frisbee'),
# (30, 'a photo of a person and a skis'), (31, 'a photo of a person and a snowboard'),
# (32, 'a photo of a person and a sports ball'), (33, 'a photo of a person and a kite'),
# (34, 'a photo of a person and a baseball bat'),
# (35, 'a photo of a person and a baseball glove'),
# (36, 'a photo of a person and a skateboard'), (37, 'a photo of a person and a surfboard'),
# (38, 'a photo of a person and a tennis racket'), (39, 'a photo of a person and a bottle'),
# (40, 'a photo of a person and a wine glass'), (41, 'a photo of a person and a cup'),
# (42, 'a photo of a person and a fork'), (43, 'a photo of a person and a knife'),
# (44, 'a photo of a person and a spoon'), (45, 'a photo of a person and a bowl'),
# (46, 'a photo of a person and a banana'), (47, 'a photo of a person and an apple'),
# (48, 'a photo of a person and a sandwich'), (49, 'a photo of a person and an orange'),
# (50, 'a photo of a person and a broccoli'), (51, 'a photo of a person and a carrot'),
# (52, 'a photo of a person and a hot dog'), (53, 'a photo of a person and a pizza'),
# (54, 'a photo of a person and a donut'), (55, 'a photo of a person and a cake'),
# (56, 'a photo of a person and a chair'), (57, 'a photo of a person and a couch'),
# (58, 'a photo of a person and a potted plant'), (59, 'a photo of a person and a bed'),
# (60, 'a photo of a person and a dining table'), (61, 'a photo of a person and a toilet'),
# (62, 'a photo of a person and a tv'), (63, 'a photo of a person and a laptop'),
# (64, 'a photo of a person and a mouse'), (65, 'a photo of a person and a remote'),
# (66, 'a photo of a person and a keyboard'), (67, 'a photo of a person and a cell phone'),
# (68, 'a photo of a person and a microwave'), (69, 'a photo of a person and an oven'),
# (70, 'a photo of a person and a toaster'), (71, 'a photo of a person and a sink'),
# (72, 'a photo of a person and a refrigerator'), (73, 'a photo of a person and a book'),
# (74, 'a photo of a person and a clock'), (75, 'a photo of a person and a vase'),
# (76, 'a photo of a person and a scissors'), (77, 'a photo of a person and a teddy bear'),
# (78, 'a photo of a person and a hair drier'), (79, 'a photo of a person and a toothbrush'),
# (80, 'a photo of a person only'), (81, 'a photo of nothing')]
# vcoco_hoi_text_label = {(0, 41): 'a photo of a person holding a cup',
# (16, 80): 'a photo of a person cutting with something',
# (17, 53): 'a photo of a person cutting a pizza',
# (0, 53): 'a photo of a person holding a pizza', (2, 80): 'a photo of a person sitting',
# (8, 53): 'a photo of a person eating a pizza',
# (9, 80): 'a photo of a person eating with something',
# (23, 80): 'a photo of a person smiling', (21, 37): 'a photo of a person surfing a surfboard',
# (0, 73): 'a photo of a person holding a book',
# (2, 13): 'a photo of a person sitting a bench',
# (5, 73): 'a photo of a person looking at a book',
# (27, 73): 'a photo of a person reading a book', (1, 80): 'a photo of a person standing',
# (22, 36): 'a photo of a person skateboarding a skateboard',
# (20, 30): 'a photo of a person skiing a skis', (0, 80): 'a photo of a person holding',
# (8, 80): 'a photo of a person eating', (2, 56): 'a photo of a person sitting a chair',
# (5, 63): 'a photo of a person looking at a laptop',
# (19, 63): 'a photo of a person working on computer a laptop',
# (0, 40): 'a photo of a person holding a wine glass',
# (24, 40): 'a photo of a person drinking a wine glass',
# (5, 31): 'a photo of a person looking at a snowboard',
# (28, 31): 'a photo of a person snowboarding a snowboard',
# (0, 76): 'a photo of a person holding a scissors',
# (5, 80): 'a photo of a person looking at something',
# (5, 76): 'a photo of a person looking at a scissors',
# (16, 76): 'a photo of a person cutting with a scissors',
# (17, 80): 'a photo of a person cutting',
# (5, 37): 'a photo of a person looking at a surfboard',
# (2, 17): 'a photo of a person sitting a horse',
# (3, 17): 'a photo of a person riding a horse', (4, 80): 'a photo of a person walking',
# (5, 29): 'a photo of a person looking at a frisbee', (10, 80): 'a photo of a person jumping',
# (14, 29): 'a photo of a person throwing a frisbee', (18, 80): 'a photo of a person running',
# (5, 53): 'a photo of a person looking at a pizza',
# (0, 48): 'a photo of a person holding a sandwich',
# (8, 48): 'a photo of a person eating a sandwich',
# (0, 67): 'a photo of a person holding a cell phone',
# (19, 80): 'a photo of a person working on computer',
# (0, 24): 'a photo of a person holding a backpack',
# (13, 24): 'a photo of a person carrying a backpack', (11, 80): 'a photo of a person laying',
# (11, 57): 'a photo of a person laying a couch',
# (0, 17): 'a photo of a person holding a horse', (0, 15): 'a photo of a person holding a cat',
# (11, 59): 'a photo of a person laying a bed',
# (15, 29): 'a photo of a person catching a frisbee', (3, 80): 'a photo of a person riding',
# (12, 67): 'a photo of a person talking on phone a cell phone',
# (0, 31): 'a photo of a person holding a snowboard',
# (10, 31): 'a photo of a person jumping a snowboard',
# (5, 36): 'a photo of a person looking at a skateboard',
# (10, 36): 'a photo of a person jumping a skateboard',
# (0, 79): 'a photo of a person holding a toothbrush', (27, 80): 'a photo of a person reading',
# (0, 39): 'a photo of a person holding a bottle',
# (24, 39): 'a photo of a person drinking a bottle',
# (2, 59): 'a photo of a person sitting a bed',
# (5, 48): 'a photo of a person looking at a sandwich',
# (0, 30): 'a photo of a person holding a skis',
# (0, 38): 'a photo of a person holding a tennis racket',
# (5, 32): 'a photo of a person looking at a sports ball',
# (6, 38): 'a photo of a person hitting with a tennis racket',
# (7, 32): 'a photo of a person hitting a sports ball',
# (5, 0): 'a photo of a person looking at a person',
# (5, 17): 'a photo of a person looking at a horse',
# (0, 47): 'a photo of a person holding an apple',
# (5, 18): 'a photo of a person looking at a sheep',
# (8, 47): 'a photo of a person eating an apple',
# (25, 32): 'a photo of a person kicking a sports ball',
# (0, 44): 'a photo of a person holding a spoon',
# (5, 55): 'a photo of a person looking at a cake',
# (8, 55): 'a photo of a person eating a cake',
# (9, 44): 'a photo of a person eating with a spoon',
# (0, 63): 'a photo of a person holding a laptop',
# (6, 80): 'a photo of a person hitting with something',
# (2, 3): 'a photo of a person sitting a motorcycle',
# (3, 3): 'a photo of a person riding a motorcycle',
# (0, 43): 'a photo of a person holding a knife',
# (5, 43): 'a photo of a person looking at a knife',
# (16, 43): 'a photo of a person cutting with a knife',
# (17, 55): 'a photo of a person cutting a cake', (7, 80): 'a photo of a person hitting',
# (0, 34): 'a photo of a person holding a baseball bat',
# (6, 34): 'a photo of a person hitting with a baseball bat',
# (15, 80): 'a photo of a person catching', (2, 57): 'a photo of a person sitting a couch',
# (0, 77): 'a photo of a person holding a teddy bear',
# (13, 49): 'a photo of a person carrying an orange',
# (0, 42): 'a photo of a person holding a fork',
# (9, 42): 'a photo of a person eating with a fork',
# (5, 62): 'a photo of a person looking at a tv',
# (0, 28): 'a photo of a person holding a suitcase',
# (13, 28): 'a photo of a person carrying a suitcase',
# (2, 20): 'a photo of a person sitting an elephant',
# (3, 20): 'a photo of a person riding an elephant',
# (5, 15): 'a photo of a person looking at a cat',
# (0, 56): 'a photo of a person holding a chair',
# (5, 60): 'a photo of a person looking at a dining table',
# (24, 41): 'a photo of a person drinking a cup', (14, 80): 'a photo of a person throwing',
# (13, 26): 'a photo of a person carrying a handbag',
# (5, 16): 'a photo of a person looking at a dog',
# (0, 46): 'a photo of a person holding a banana',
# (13, 46): 'a photo of a person carrying a banana',
# (5, 28): 'a photo of a person looking at a suitcase',
# (9, 43): 'a photo of a person eating with a knife',
# (0, 37): 'a photo of a person holding a surfboard',
# (13, 37): 'a photo of a person carrying a surfboard',
# (8, 54): 'a photo of a person eating a donut',
# (0, 0): 'a photo of a person holding a person',
# (0, 35): 'a photo of a person holding a baseball glove',
# (0, 65): 'a photo of a person holding a remote',
# (0, 54): 'a photo of a person holding a donut',
# (0, 26): 'a photo of a person holding a handbag', (13, 80): 'a photo of a person carrying',
# (13, 0): 'a photo of a person carrying a person',
# (0, 32): 'a photo of a person holding a sports ball',
# (14, 32): 'a photo of a person throwing a sports ball',
# (5, 54): 'a photo of a person looking at a donut',
# (0, 1): 'a photo of a person holding a bicycle',
# (2, 1): 'a photo of a person sitting a bicycle',
# (3, 1): 'a photo of a person riding a bicycle',
# (5, 1): 'a photo of a person looking at a bicycle', (25, 80): 'a photo of a person kicking',
# (5, 67): 'a photo of a person looking at a cell phone',
# (5, 6): 'a photo of a person looking at a train',
# (0, 29): 'a photo of a person holding a frisbee',
# (0, 36): 'a photo of a person holding a skateboard',
# (3, 7): 'a photo of a person riding a truck',
# (26, 63): 'a photo of a person pointing a laptop',
# (0, 3): 'a photo of a person holding a motorcycle',
# (13, 30): 'a photo of a person carrying a skis',
# (0, 25): 'a photo of a person holding a umbrella',
# (5, 45): 'a photo of a person looking at a bowl',
# (17, 51): 'a photo of a person cutting a carrot',
# (0, 52): 'a photo of a person holding a hot dog',
# (8, 52): 'a photo of a person eating a hot dog',
# (0, 33): 'a photo of a person holding a kite',
# (5, 13): 'a photo of a person looking at a bench',
# (12, 80): 'a photo of a person talking on phone',
# (22, 80): 'a photo of a person skateboarding',
# (5, 35): 'a photo of a person looking at a baseball glove',
# (15, 32): 'a photo of a person catching a sports ball',
# (26, 80): 'a photo of a person pointing',
# (13, 25): 'a photo of a person carrying a umbrella',
# (5, 40): 'a photo of a person looking at a wine glass',
# (10, 37): 'a photo of a person jumping a surfboard',
# (5, 33): 'a photo of a person looking at a kite',
# (13, 33): 'a photo of a person carrying a kite',
# (3, 6): 'a photo of a person riding a train',
# (5, 44): 'a photo of a person looking at a spoon',
# (0, 20): 'a photo of a person holding an elephant', (21, 80): 'a photo of a person surfing',
# (5, 20): 'a photo of a person looking at an elephant',
# (3, 8): 'a photo of a person riding a boat',
# (5, 23): 'a photo of a person looking at a giraffe',
# (13, 67): 'a photo of a person carrying a cell phone',
# (11, 56): 'a photo of a person laying a chair',
# (5, 19): 'a photo of a person looking at a cow',
# (5, 42): 'a photo of a person looking at a fork',
# (0, 55): 'a photo of a person holding a cake',
# (13, 32): 'a photo of a person carrying a sports ball',
# (5, 30): 'a photo of a person looking at a skis',
# (13, 36): 'a photo of a person carrying a skateboard',
# (26, 67): 'a photo of a person pointing a cell phone',
# (5, 52): 'a photo of a person looking at a hot dog',
# (8, 46): 'a photo of a person eating a banana', (20, 80): 'a photo of a person skiing',
# (28, 80): 'a photo of a person snowboarding', (0, 14): 'a photo of a person holding a bird',
# (11, 60): 'a photo of a person laying a dining table',
# (0, 16): 'a photo of a person holding a dog',
# (0, 72): 'a photo of a person holding a refrigerator',
# (5, 72): 'a photo of a person looking at a refrigerator',
# (5, 7): 'a photo of a person looking at a truck',
# (5, 41): 'a photo of a person looking at a cup',
# (2, 61): 'a photo of a person sitting a toilet', (24, 80): 'a photo of a person drinking',
# (0, 27): 'a photo of a person holding a tie',
# (5, 27): 'a photo of a person looking at a tie',
# (17, 27): 'a photo of a person cutting a tie',
# (5, 10): 'a photo of a person looking at a fire hydrant',
# (26, 10): 'a photo of a person pointing a fire hydrant',
# (11, 13): 'a photo of a person laying a bench',
# (17, 18): 'a photo of a person cutting a sheep',
# (0, 64): 'a photo of a person holding a mouse',
# (5, 64): 'a photo of a person looking at a mouse',
# (5, 66): 'a photo of a person looking at a keyboard',
# (16, 42): 'a photo of a person cutting with a fork',
# (17, 0): 'a photo of a person cutting a person',
# (5, 5): 'a photo of a person looking at a bus', (3, 2): 'a photo of a person riding a car',
# (10, 30): 'a photo of a person jumping a skis',
# (5, 4): 'a photo of a person looking at an airplane',
# (5, 46): 'a photo of a person looking at a banana',
# (2, 28): 'a photo of a person sitting a suitcase',
# (13, 29): 'a photo of a person carrying a frisbee',
# (5, 26): 'a photo of a person looking at a handbag',
# (8, 50): 'a photo of a person eating a broccoli',
# (17, 46): 'a photo of a person cutting a banana',
# (0, 18): 'a photo of a person holding a sheep',
# (17, 48): 'a photo of a person cutting a sandwich',
# (26, 0): 'a photo of a person pointing a person',
# (5, 3): 'a photo of a person looking at a motorcycle',
# (5, 24): 'a photo of a person looking at a backpack',
# (0, 45): 'a photo of a person holding a bowl',
# (26, 27): 'a photo of a person pointing a tie',
# (0, 49): 'a photo of a person holding an orange',
# (8, 49): 'a photo of a person eating an orange',
# (5, 34): 'a photo of a person looking at a baseball bat',
# (13, 31): 'a photo of a person carrying a snowboard',
# (17, 54): 'a photo of a person cutting a donut',
# (5, 38): 'a photo of a person looking at a tennis racket',
# (8, 51): 'a photo of a person eating a carrot',
# (17, 47): 'a photo of a person cutting an apple',
# (13, 40): 'a photo of a person carrying a wine glass',
# (26, 48): 'a photo of a person pointing a sandwich',
# (26, 62): 'a photo of a person pointing a tv',
# (13, 74): 'a photo of a person carrying a clock',
# (5, 61): 'a photo of a person looking at a toilet',
# (26, 19): 'a photo of a person pointing a cow',
# (5, 65): 'a photo of a person looking at a remote',
# (26, 18): 'a photo of a person pointing a sheep',
# (0, 50): 'a photo of a person holding a broccoli',
# (0, 13): 'a photo of a person holding a bench',
# (26, 33): 'a photo of a person pointing a kite',
# (0, 7): 'a photo of a person holding a truck',
# (13, 41): 'a photo of a person carrying a cup',
# (24, 45): 'a photo of a person drinking a bowl',
# (13, 38): 'a photo of a person carrying a tennis racket',
# (13, 39): 'a photo of a person carrying a bottle',
# (5, 47): 'a photo of a person looking at an apple',
# (5, 56): 'a photo of a person looking at a chair',
# (2, 24): 'a photo of a person sitting a backpack',
# (26, 60): 'a photo of a person pointing a dining table',
# (0, 78): 'a photo of a person holding a hair drier',
# (5, 39): 'a photo of a person looking at a bottle',
# (26, 55): 'a photo of a person pointing a cake',
# (26, 66): 'a photo of a person pointing a keyboard',
# (26, 72): 'a photo of a person pointing a refrigerator',
# (5, 74): 'a photo of a person looking at a clock',
# (0, 8): 'a photo of a person holding a boat', (17, 45): 'a photo of a person cutting a bowl',
# (26, 23): 'a photo of a person pointing a giraffe',
# (5, 25): 'a photo of a person looking at a umbrella',
# (0, 66): 'a photo of a person holding a keyboard',
# (2, 26): 'a photo of a person sitting a handbag',
# (26, 52): 'a photo of a person pointing a hot dog',
# (2, 60): 'a photo of a person sitting a dining table',
# (13, 77): 'a photo of a person carrying a teddy bear',
# (0, 51): 'a photo of a person holding a carrot',
# (13, 34): 'a photo of a person carrying a baseball bat',
# (5, 2): 'a photo of a person looking at a car', (3, 5): 'a photo of a person riding a bus',
# (17, 50): 'a photo of a person cutting a broccoli',
# (5, 14): 'a photo of a person looking at a bird',
# (13, 73): 'a photo of a person carrying a book',
# (5, 50): 'a photo of a person looking at a broccoli'}
vcoco_obj_text_label = [(0, 'a photo of a person and a person'), (1, 'a photo of a person and a bicycle'),
(2, 'a photo of a person and a car'), (3, 'a photo of a person and a motorcycle'),
(4, 'a photo of a person and an airplane'), (5, 'a photo of a person and a bus'),
(6, 'a photo of a person and a train'), (7, 'a photo of a person and a truck'),
(8, 'a photo of a person and a boat'), (9, 'a photo of a person and a traffic light'),
(10, 'a photo of a person and a fire hydrant'), (11, 'a photo of a person and a stop sign'),
(12, 'a photo of a person and a parking meter'), (13, 'a photo of a person and a bench'),
(14, 'a photo of a person and a bird'), (15, 'a photo of a person and a cat'),
(16, 'a photo of a person and a dog'), (17, 'a photo of a person and a horse'),
(18, 'a photo of a person and a sheep'), (19, 'a photo of a person and a cow'),
(20, 'a photo of a person and an elephant'), (21, 'a photo of a person and a bear'),
(22, 'a photo of a person and a zebra'), (23, 'a photo of a person and a giraffe'),
(24, 'a photo of a person and a backpack'), (25, 'a photo of a person and a umbrella'),
(26, 'a photo of a person and a handbag'), (27, 'a photo of a person and a tie'),
(28, 'a photo of a person and a suitcase'), (29, 'a photo of a person and a frisbee'),
(30, 'a photo of a person and a skis'), (31, 'a photo of a person and a snowboard'),
(32, 'a photo of a person and a sports ball'), (33, 'a photo of a person and a kite'),
(34, 'a photo of a person and a baseball bat'),
(35, 'a photo of a person and a baseball glove'),
(36, 'a photo of a person and a skateboard'), (37, 'a photo of a person and a surfboard'),
(38, 'a photo of a person and a tennis racket'), (39, 'a photo of a person and a bottle'),
(40, 'a photo of a person and a wine glass'), (41, 'a photo of a person and a cup'),
(42, 'a photo of a person and a fork'), (43, 'a photo of a person and a knife'),
(44, 'a photo of a person and a spoon'), (45, 'a photo of a person and a bowl'),
(46, 'a photo of a person and a banana'), (47, 'a photo of a person and an apple'),
(48, 'a photo of a person and a sandwich'), (49, 'a photo of a person and an orange'),
(50, 'a photo of a person and a broccoli'), (51, 'a photo of a person and a carrot'),
(52, 'a photo of a person and a hot dog'), (53, 'a photo of a person and a pizza'),
(54, 'a photo of a person and a donut'), (55, 'a photo of a person and a cake'),
(56, 'a photo of a person and a chair'), (57, 'a photo of a person and a couch'),
(58, 'a photo of a person and a potted plant'), (59, 'a photo of a person and a bed'),
(60, 'a photo of a person and a dining table'), (61, 'a photo of a person and a toilet'),
(62, 'a photo of a person and a tv'), (63, 'a photo of a person and a laptop'),
(64, 'a photo of a person and a mouse'), (65, 'a photo of a person and a remote'),
(66, 'a photo of a person and a keyboard'), (67, 'a photo of a person and a cell phone'),
(68, 'a photo of a person and a microwave'), (69, 'a photo of a person and an oven'),
(70, 'a photo of a person and a toaster'), (71, 'a photo of a person and a sink'),
(72, 'a photo of a person and a refrigerator'), (73, 'a photo of a person and a book'),
(74, 'a photo of a person and a clock'), (75, 'a photo of a person and a vase'),
(76, 'a photo of a person and a scissors'), (77, 'a photo of a person and a teddy bear'),
(78, 'a photo of a person and a hair drier'), (79, 'a photo of a person and a toothbrush'),
]
# {0: 'hold', 2: 'ride', 3: 'look at', 4: 'hit with', 5: 'hit', 8:'jump', 9: 'lie on', 11: "carry", 12: 'throw', 15: 'cut', 17: 'ski', 21: 'kick', 23: 'snowboabrd'}
vcoco_hoi_text_label = {(0, 1): 'a photo of a person holding a person',
(3, 1): 'a photo of a person looking at a person',
(11, 1): 'a photo of a person carrying a person',
(15, 1): 'a photo of a person cutting a person',
(0, 2): 'a photo of a person holding a bicycle',
(1, 2): 'a photo of a person sitting on a bicycle',
(2, 2): 'a photo of a person riding a bicycle',
(3, 2): 'a photo of a person looking at a bicycle',
(11, 2): 'a photo of a person carrying a bicycle',
(2, 3): 'a photo of a person riding a car',
(3, 3): 'a photo of a person looking at a car',
(11, 3): 'a photo of a person carrying a car',
(0, 4): 'a photo of a person holding a motorcycle',
(1, 4): 'a photo of a person sitting on a motorcycle',
(2, 4): 'a photo of a person riding a motorcycle',
(3, 4): 'a photo of a person looking at a motorcycle',
(3, 5): 'a photo of a person looking at a airplane',
(2, 6): 'a photo of a person riding a bus',
(3, 6): 'a photo of a person looking at a bus',
(2, 7): 'a photo of a person riding a train',
(3, 7): 'a photo of aperson looking at a train',
(0, 8): 'a photo of a person holding a truck',
(2, 8): 'a photo of a person riding a truck', (3, 8): 'a photo of a person looking at a truck',
(0, 9): 'a photo of a person holding a boat', (2, 9): 'a photo of a person riding a boat',
(3, 9): 'a photo of a person looking at a boat',
(3, 10): 'a photo of a person looking at a traffic light',
(3, 11): 'a photo of a person looking at a fire hydrant',
(0, 14): 'a photo of a person holding a bench', (1, 14): 'a photo of a person sitting on a bench',
(3, 14): 'a photo of a person looking at a bench', (9, 14): 'a photo of a person lying on a bench',
(0, 15): 'aphoto of a person holding a bird', (3, 15): 'a photo of a person looking at a bird',
(0, 16): 'a photo of a person holding a cat', (3, 16): 'a photo of a person looking at a cat',
(0, 17): 'a photo of a person holding a dog', (3, 17): 'a photo of a personlooking at a dog',
(11, 17): 'a photo of a person carrying a dog',
(0, 18): 'a photo of a person holding a horse', (1, 18): 'a photo of a person sitting on a horse',
(2, 18): 'a photo of a person riding a horse', (3, 18): 'a photo of a person looking at a horse',
(0, 19): 'a photo of a person holding a sheep', (3, 19): 'a photo of a person looking at a sheep',
(11, 19): 'a photo of a person carrying a sheep', (15, 19): 'a photo of a person cutting a sheep',
(0, 20): 'a photo of a person holding a cow', (3,20): 'a photo of a person looking at a cow',
(0, 21): 'a photo of a person holding a elephant', (1, 21): 'a photo of a person sitting on a elephant',
(2, 21): 'a photo of a person riding a elephant', (3, 21): 'a photo of a person looking at a elephant',
(3, 24): 'a photo of a person looking at a giraffe',
(0, 25): 'a photo of a person holding a backpack', (1, 25): 'a photo of a person sitting on a backpack',
(3, 25): 'a photo of a person looking at a backpack',
(11, 25): 'a photo of a person carrying a backpack',
(0, 26): 'a photo of a person holding a umbrella', (3, 26): 'a photo of a person looking at a umbrella',
(11, 26): 'a photo of a person carrying a umbrella',
(0, 27): 'a photo of a person holding a handbag', (1, 27): 'a photo of a person sitting ona handbag',
(3, 27): 'a photo of a person looking at a handbag', (11, 27): 'a photo of a person carrying a handbag',
(0, 28): 'a photo of a person holding a tie', (3, 28): 'a photo of a person looking at a tie',
(15, 28): 'a photo of a person cutting a tie', (0, 29): 'a photo of a person holding a suitcase',
(1, 29): 'a photo of a person sitting on a suitcase', (3, 29): 'a photo of a person looking at a suitcase',
(11, 29): 'a photo of a person carrying a suitcase',
(0, 30): 'a photo of a person holding a frisbee',
(3, 30): 'a photo of a person looking at a frisbee',
(11, 30): 'a photo of a person carrying a frisbee',
(12, 30): 'a photo of a person throwing a frisbee',
(13, 30): 'a photo of a person catching a frisbee',
(0, 31): 'a photo of a person holdinga skis', (3, 31): 'a photo of a person looking at a skis',
(8, 31): 'a photo of a person jumping on a skis', (11, 31): 'a photoof a person carrying a skis',
(17, 31): 'a photo of a person skiing on a skis',
(0, 32): 'a photo of a person holding a snowboard',
(3, 32): 'a photo of a person looking at a snowboard',
(8, 32): 'a photo of a person jumping on a snowboard', (11, 32): 'a photo of a person carrying a snowboard',
(23, 32): 'a photo of a person snowboarding a snowboard',
(0, 33): 'a photo of a personholding a sports ball', (3, 33): 'a photo of a person looking at a sports ball',
(5, 33): 'a photo of a person hitting a sportsball',
(11, 33): 'a photo of a person carrying a sports ball',
(12, 33): 'a photo of a person throwing a sports ball',
(13, 33): 'a photo of a person catching a sports ball',
(21, 33): 'a photo of a person kicking a sports ball', (0, 34): 'a photo of a person holding a kite',
(3, 34): 'a photo of a person looking at a kite', (11, 34): 'a photo of a person carrying a kite',
(0, 35): 'a photo of a person holding a baseball bat',
(3, 35): 'a photo of a person looking at a baseball bat',
(4, 35): 'a photo of a person hitting with a baseball bat', (11, 35): 'a photo of a person carrying a baseball bat',
(0, 36): 'a photo of a person holding a baseball glove', (3, 36): 'a photo of a person looking at a baseball glove',
(0, 37): 'a photo of a person holding a skateboard', (3, 37): 'a photo of a person looking at a skateboard',
(8, 37): 'a photo of a person jumping on a skateboard', (11, 37): 'a photo of a person carrying a skateboard',
(15, 37): 'a photo of a person cutting a skateboard', (19, 37): 'a photo of a person skateboarding a skateboard',
(0, 38): 'a photo of a person holding a surfboard', (3, 38): 'a photo of a person looking at asurfboard',
(8, 38): 'a photo of a person jumping on a surfboard', (11, 38): 'a photo of a person carrying a surfboard',
(18, 38): 'a photo of a person surfing a surfboard', (0, 39): 'a photo of a person holding a tennis racket',
(3, 39): 'a photo of a person looking at a tennis racket', (4, 39): 'a photo of a person hitting with a tennis racket',
(11, 39): 'a photo of a person carrying a tennis racket', (0, 40): 'a photo of a person holding a bottle',
(3, 40): 'a photo of a person looking at a bottle', (11, 40): 'a photo of a person carrying a bottle',
(20, 40): 'a photo of a person drinking with a bottle', (0, 41): 'a photo of a person holding a wine glass',
(3, 41): 'a photo of a person looking at a wine glass',
(11, 41): 'a photo of a person carrying a wine glass',
(20, 41): 'a photo of a person drinking with a wine glass',
(0, 42): 'a photo of a person holding a cup', (3, 42): 'a photo of a person looking at a cup',
(11, 42): 'a photo of a person carrying a cup', (20, 42): 'a photo of a person drinking with a cup',
(0, 43): 'a photo of a person holding a fork', (3, 43): 'a photo of a person looking at a fork',
(7, 43): 'a photo of a person eating with a fork', (14, 43): 'a photo of a person cutting with a fork',
(0, 44): 'a photo of a person holding a knife', (3, 44): 'a photo of a person looking at a knife',
(7, 44): 'a photo of a person eating with a knife', (14, 44): 'a photo of a person cutting with a knife',
(0, 45): 'a photo of a person holding a spoon', (3, 45): 'a photo of a person looking at a spoon',
(7, 45): 'a photo of a person eating with a spoon',
(0, 46): 'a photo of a person holding a bowl', (3, 46): 'a photo of a person looking at a bowl',
(15, 46): 'a photo of a person cutting a bowl', (20, 46): 'a photo of a person drinking with a bowl',
(0, 47): 'a photo of a person holding a banana', (3, 47): 'a photo of a person looking at a banana',
(6, 47): 'a photo of a person eating a banana', (11, 47): 'a photo of a person carrying a banana',
(15, 47): 'a photo of a person cutting a banana',
(0, 48): 'a photo of a person holding a apple', (3, 48): 'a photo of a person looking at a apple',
(6, 48): 'a photo of a person eating a apple', (15, 48): 'a photo of a person cutting a apple',
(0, 49): 'a photo of a person holding a sandwich', (3, 49): 'a photo of a person looking at a sandwich',
(6, 49): 'a photo of a person eating a sandwich', (15, 49): 'a photo of a person cutting a sandwich',
(0, 50): 'a photo of a person holding a orange', (6, 50): 'a photo of a person eating a orange',
(11, 50): 'a photoof a person carrying a orange', (15, 50): 'a photo of a person cutting a orange',
(0, 51): 'a photo of a person holding a broccoli', (3, 51): 'a photo of a person looking at a broccoli',
(6, 51): 'a photo of a person eating a broccoli', (15, 51): 'a photoof a person cutting a broccoli',
(0, 52): 'a photo of a person holding a carrot', (6, 52): 'a photo of a person eating a carrot',
(15, 52): 'a photo of a person cutting a carrot',
(0, 53): 'a photo of a person holding a hot dog', (3, 53): 'a photo of a person looking at a hot dog',
(6, 53): 'a photo of a person eating a hot dog', (15, 53): 'a photo of a person cutting a hot dog',
(0, 54): 'a photo of a person holding a pizza', (3, 54): 'a photo of a person looking at a pizza',
(6, 54): 'a photo of a personeating a pizza', (15, 54): 'a photo of a person cutting a pizza',
(0, 55): 'a photo of a person holding a donut', (3, 55): 'a photo of a person looking at a donut',
(6, 55): 'a photo of a person eating a donut', (11, 55): 'a photo of a person carrying a donut',
(15, 55): 'a photo of a person cutting a donut', (0, 56): 'a photo of a person holding a cake',
(3, 56): 'a photo of a person looking at a cake', (6, 56): 'a photo of a person eating a cake',
(15, 56): 'a photo of a person cutting a cake',
(0, 57): 'a photo of a person holding a chair', (1, 57): 'a photo of a person sitting on a chair',
(3, 57): 'a photo of a person looking at a chair', (9, 57): 'a photo of a person lying on a chair',
(1, 58): 'a photo of a person sitting on a couch', (9, 58): 'a photo of a person lying on a couch',
(3, 59): 'a photo of a person looking at a potted plant', (0, 60): 'a photo of a person holding a bed',
(1, 60): 'a photo of a person sitting on a bed', (9, 60): 'a photo of a person lying on a bed',
(0, 61): 'a photo of a person holding a dining table', (1, 61): 'a photo of a person sitting on a dining table',
(3, 61): 'a photo of a person looking at a dining table', (9, 61): 'a photo of a person lying on a dining table',
(1, 62): 'a photo of a person sitting on a toilet', (3, 62): 'a photo of a person looking at a toilet',
(9, 62): 'a photo of a person lying on a toilet', (3, 63): 'a photo ofa person looking at a tv',
(0, 64): 'a photo of a person holding a laptop', (3, 64): 'a photo of a person looking at a laptop',
(11, 64): 'a photo of a person carrying a laptop', (15, 64): 'a photo of a person cutting a laptop',
(16, 64): 'a photo of a person working on a laptop', (0, 65): 'a photo of a person holding a mouse',
(3, 65): 'a photo of a person looking at a mouse', (0, 66): 'a photo of a person holding a remote',
(3, 66): 'a photo of a person looking at a remote', (0, 67): 'a photo of a personholding a keyboard',
(3, 67): 'a photo of a person looking at a keyboard', (0, 68): 'a photo of a person holding a cell phone',
(3, 68): 'a photo of a person looking at a cell phone', (10, 68): 'a photo of a person talking on a cell phone',
(11, 68): 'a photo of a person carrying a cell phone', (0, 73): 'a photo of a person holding a refrigerator',
(3, 73): 'a photo of a person looking at a refrigerator', (0, 74): 'a photo of a person holding a book',
(3, 74): 'a photo of a person looking at a book', (11, 74): 'a photo of a person carrying a book',
(15, 74): 'a photo of a person cutting a book', (22, 74): 'a photo of a person reading a book',
(3, 75): 'a photo of a person looking at a clock', (11, 75): 'a photo of a person carrying a clock',
(0, 77): 'a photo of a person holding a scissors', (3, 77): 'a photo of a person looking at a scissors',
(14, 77): 'a photo of a person cuttingwith a scissors', (0, 78): 'a photo of a person holding a teddy bear',
(11, 78): 'a photo of a person carrying a teddy bear', (0, 79): 'a photo of a person holding a hair drier',
(0, 80): 'a photo of a person holding a toothbrush'}
MAP_AO_TO_HOI_COCO = {(0, 1): 0, (3, 1): 1, (11, 1): 2, (15, 1): 3, (0, 2): 4, (1, 2): 5, (2, 2): 6, (3, 2): 7, (11, 2): 8, (2, 3): 9, (3, 3): 10, (11, 3): 11, (0, 4): 12, (1, 4): 13, (2, 4): 14, (3, 4): 15, (3, 5): 16, (2, 6): 17, (3, 6): 18, (2, 7): 19, (3, 7): 20, (0, 8): 21, (2, 8): 22, (3, 8): 23, (0, 9): 24, (2, 9): 25, (3, 9): 26, (3, 10): 27, (3, 11): 28, (0, 14): 29, (1, 14): 30, (3, 14): 31, (9, 14): 32, (0, 15): 33, (3, 15): 34, (0, 16): 35, (3, 16): 36, (0, 17): 37, (3, 17): 38, (11, 17): 39, (0, 18): 40, (1, 18): 41, (2, 18): 42, (3, 18): 43, (0, 19): 44, (3, 19): 45, (11, 19): 46, (15, 19): 47, (0, 20): 48, (3, 20): 49, (0, 21): 50, (1, 21): 51, (2, 21): 52, (3, 21): 53, (3, 24): 54, (0, 25): 55, (1, 25): 56, (3, 25): 57, (11, 25): 58, (0, 26): 59, (3, 26): 60, (11, 26): 61, (0, 27): 62, (1, 27): 63, (3, 27): 64, (11, 27): 65, (0, 28): 66, (3, 28): 67, (15, 28): 68, (0, 29): 69, (1, 29): 70, (3, 29): 71, (11, 29): 72, (0, 30): 73, (3, 30): 74, (11, 30): 75, (12, 30): 76, (13, 30): 77, (0, 31): 78, (3, 31): 79, (8, 31): 80, (11, 31): 81, (17, 31): 82, (0, 32): 83, (3, 32): 84, (8, 32): 85, (11, 32): 86, (23, 32): 87, (0, 33): 88, (3, 33): 89, (5, 33): 90, (11, 33): 91, (12, 33): 92, (13, 33): 93, (21, 33): 94, (0, 34): 95, (3, 34): 96, (11, 34): 97, (0, 35): 98, (3, 35): 99, (4, 35): 100, (11, 35): 101, (0, 36): 102, (3, 36): 103, (0, 37): 104, (3, 37): 105, (8, 37): 106, (11, 37): 107, (15, 37): 108, (19, 37): 109, (0, 38): 110, (3, 38): 111, (8, 38): 112, (11, 38): 113, (18, 38): 114, (0, 39): 115, (3, 39): 116, (4, 39): 117, (11, 39): 118, (0, 40): 119, (3, 40): 120, (11, 40): 121, (20, 40): 122, (0, 41): 123, (3, 41): 124, (11, 41): 125, (20, 41): 126, (0, 42): 127, (3, 42): 128, (11, 42): 129, (20, 42): 130, (0, 43): 131, (3, 43): 132, (7, 43): 133, (14, 43): 134, (0, 44): 135, (3, 44): 136, (7, 44): 137, (14, 44): 138, (0, 45): 139, (3, 45): 140, (7, 45): 141, (0, 46): 142, (3, 46): 143, (15, 46): 144, (20, 46): 145, (0, 47): 146, (3, 47): 147, (6, 47): 148, (11, 47): 149, (15, 47): 150, (0, 48): 151, (3, 48): 152, (6, 48): 153, (15, 48): 154, (0, 49): 155, (3, 49): 156, (6, 49): 157, (15, 49): 158, (0, 50): 159, (6, 50): 160, (11, 50): 161, (15, 50): 162, (0, 51): 163, (3, 51): 164, (6, 51): 165, (15, 51): 166, (0, 52): 167, (6, 52): 168, (15, 52): 169, (0, 53): 170, (3, 53): 171, (6, 53): 172, (15, 53): 173, (0, 54): 174, (3, 54): 175, (6, 54): 176, (15, 54): 177, (0, 55): 178, (3, 55): 179, (6, 55): 180, (11, 55): 181, (15, 55): 182, (0, 56): 183, (3, 56): 184, (6, 56): 185, (15, 56): 186, (0, 57): 187, (1, 57): 188, (3, 57): 189, (9, 57): 190, (1, 58): 191, (9, 58): 192, (3, 59): 193, (0, 60): 194, (1, 60): 195, (9, 60): 196, (0, 61): 197, (1, 61): 198, (3, 61): 199, (9, 61): 200, (1, 62): 201, (3, 62): 202, (9, 62): 203, (3, 63): 204, (0, 64): 205, (3, 64): 206, (11, 64): 207, (15, 64): 208, (16, 64): 209, (0, 65): 210, (3, 65): 211, (0, 66): 212, (3, 66): 213, (0, 67): 214, (3, 67): 215, (0, 68): 216, (3, 68): 217, (10, 68): 218, (11, 68): 219, (0, 73): 220, (3, 73): 221, (0, 74): 222, (3, 74): 223, (11, 74): 224, (15, 74): 225, (22, 74): 226, (3, 75): 227, (11, 75): 228, (0, 77): 229, (3, 77): 230, (14, 77): 231, (0, 78): 232, (11, 78): 233, (0, 79): 234, (0, 80): 235}
HOI_TO_AO_COCO = {0: (0, 1), 1: (3, 1), 2: (11, 1), 3: (15, 1), 4: (0, 2), 5: (1, 2), 6: (2, 2), 7: (3, 2), 8: (11, 2), 9: (2, 3), 10: (3, 3), 11: (11, 3), 12: (0, 4), 13: (1, 4), 14: (2, 4), 15: (3, 4), 16: (3, 5), 17: (2, 6), 18: (3, 6), 19: (2, 7), 20: (3, 7), 21: (0, 8), 22: (2, 8), 23: (3, 8), 24: (0, 9), 25: (2, 9), 26: (3, 9), 27: (3, 10), 28: (3, 11), 29: (0, 14), 30: (1, 14), 31: (3, 14), 32: (9, 14), 33: (0, 15), 34: (3, 15), 35: (0, 16), 36: (3, 16), 37: (0, 17), 38: (3, 17), 39: (11, 17), 40: (0, 18), 41: (1, 18), 42: (2, 18), 43: (3, 18), 44: (0, 19), 45: (3, 19), 46: (11, 19), 47: (15, 19), 48: (0, 20), 49: (3, 20), 50: (0, 21), 51: (1, 21), 52: (2, 21), 53: (3, 21), 54: (3, 24), 55: (0, 25), 56: (1, 25), 57: (3, 25), 58: (11, 25), 59: (0, 26), 60: (3, 26), 61: (11, 26), 62: (0, 27), 63: (1, 27), 64: (3, 27), 65: (11, 27), 66: (0, 28), 67: (3, 28), 68: (15, 28), 69: (0, 29), 70: (1, 29), 71: (3, 29), 72: (11, 29), 73: (0, 30), 74: (3, 30), 75: (11, 30), 76: (12, 30), 77: (13, 30), 78: (0, 31), 79: (3, 31), 80: (8, 31), 81: (11, 31), 82: (17, 31), 83: (0, 32), 84: (3, 32), 85: (8, 32), 86: (11, 32), 87: (23, 32), 88: (0, 33), 89: (3, 33), 90: (5, 33), 91: (11, 33), 92: (12, 33), 93: (13, 33), 94: (21, 33), 95: (0, 34), 96: (3, 34), 97: (11, 34), 98: (0, 35), 99: (3, 35), 100: (4, 35), 101: (11, 35), 102: (0, 36), 103: (3, 36), 104: (0, 37), 105: (3, 37), 106: (8, 37), 107: (11, 37), 108: (15, 37), 109: (19, 37), 110: (0, 38), 111: (3, 38), 112: (8, 38), 113: (11, 38), 114: (18, 38), 115: (0, 39), 116: (3, 39), 117: (4, 39), 118: (11, 39), 119: (0, 40), 120: (3, 40), 121: (11, 40), 122: (20, 40), 123: (0, 41), 124: (3, 41), 125: (11, 41), 126: (20, 41), 127: (0, 42), 128: (3, 42), 129: (11, 42), 130: (20, 42), 131: (0, 43), 132: (3, 43), 133: (7, 43), 134: (14, 43), 135: (0, 44), 136: (3, 44), 137: (7, 44), 138: (14, 44), 139: (0, 45), 140: (3, 45), 141: (7, 45), 142: (0, 46), 143: (3, 46), 144: (15, 46), 145: (20, 46), 146: (0, 47), 147: (3, 47), 148: (6, 47), 149: (11, 47), 150: (15, 47), 151: (0, 48), 152: (3, 48), 153: (6, 48), 154: (15, 48), 155: (0, 49), 156: (3, 49), 157: (6, 49), 158: (15, 49), 159: (0, 50), 160: (6, 50), 161: (11, 50), 162: (15, 50), 163: (0, 51), 164: (3, 51), 165: (6, 51), 166: (15, 51), 167: (0, 52), 168: (6, 52), 169: (15, 52), 170: (0, 53), 171: (3, 53), 172: (6, 53), 173: (15, 53), 174: (0, 54), 175: (3, 54), 176: (6, 54), 177: (15, 54), 178: (0, 55), 179: (3, 55), 180: (6, 55), 181: (11, 55), 182: (15, 55), 183: (0, 56), 184: (3, 56), 185: (6, 56), 186: (15, 56), 187: (0, 57), 188: (1, 57), 189: (3, 57), 190: (9, 57), 191: (1, 58), 192: (9, 58), 193: (3, 59), 194: (0, 60), 195: (1, 60), 196: (9, 60), 197: (0, 61), 198: (1, 61), 199: (3, 61), 200: (9, 61), 201: (1, 62), 202: (3, 62), 203: (9, 62), 204: (3, 63), 205: (0, 64), 206: (3, 64), 207: (11, 64), 208: (15, 64), 209: (16, 64), 210: (0, 65), 211: (3, 65), 212: (0, 66), 213: (3, 66), 214: (0, 67), 215: (3, 67), 216: (0, 68), 217: (3, 68), 218: (10, 68), 219: (11, 68), 220: (0, 73), 221: (3, 73), 222: (0, 74), 223: (3, 74), 224: (11, 74), 225: (15, 74), 226: (22, 74), 227: (3, 75), 228: (11, 75), 229: (0, 77), 230: (3, 77), 231: (14, 77), 232: (0, 78), 233: (11, 78), 234: (0, 79), 235: (0, 80)}