diff --git a/.gitignore b/.gitignore index 0205d62..d0a113f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ *.pyc -.DS_Store +.DS_Store \ No newline at end of file diff --git a/out/imagenet_flow.txt b/out/imagenet_flow.txt deleted file mode 100644 index b9d8fa6..0000000 --- a/out/imagenet_flow.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 63.252754 - -Top classes and probabilities -0.9477566 15.957031 playing cricket -0.04068217 12.808723 hurling (sport) -0.004154162 10.527044 playing tennis -0.0024740736 10.0088 playing squash or racquetball -0.0013800215 9.425033 hitting baseball -0.00079995755 8.879737 catching or throwing baseball -0.0007868024 8.863155 sword fighting -0.00076301256 8.832453 catching or throwing softball -0.00024411127 7.6928024 hammer throw -0.00019753167 7.481077 playing badminton -6.432761e-05 6.359167 pumping fist -6.392867e-05 6.3529463 tai chi -3.89561e-05 5.8576136 training dog -3.478504e-05 5.744365 catching or throwing frisbee -3.0313964e-05 5.606787 throwing ball -2.9222967e-05 5.570133 celebrating -2.8943172e-05 5.5605125 shot put -2.3183306e-05 5.3386106 playing ice hockey -2.2206976e-05 5.2955847 golf putting -2.1811395e-05 5.2776113 pole vault diff --git a/out/imagenet_joint.txt b/out/imagenet_joint.txt deleted file mode 100644 index 81297d8..0000000 --- a/out/imagenet_joint.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 138.468658 - -Top classes and probabilities -1.0 41.813675 playing cricket -1.4971563e-09 21.493977 hurling (sport) -3.8431164e-10 20.134108 catching or throwing baseball -1.5492331e-10 19.225584 catching or throwing softball -1.13601656e-10 18.915352 hitting baseball -8.801122e-11 18.660118 playing tennis -2.4415652e-11 17.377878 playing kickball -1.1531949e-11 16.627775 playing squash or racquetball -6.131936e-12 15.996164 shooting goal (soccer) -4.391769e-12 15.662386 hammer throw -2.21341e-12 14.977189 golf putting -1.6307157e-12 14.671673 throwing discus -1.5456367e-12 14.61809 javelin throw -7.6691496e-13 13.917274 pumping fist -5.192979e-13 13.527376 shot put -4.2681665e-13 13.331253 celebrating -2.720572e-13 12.880911 applauding -1.835705e-13 12.487497 throwing ball -1.6134789e-13 12.358461 dodgeball -1.1388395e-13 12.010079 tap dancing diff --git a/out/imagenet_rgb.txt b/out/imagenet_rgb.txt deleted file mode 100644 index 69af33e..0000000 --- a/out/imagenet_rgb.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 87.108871 - -Top classes and probabilities -0.9999968 25.856646 playing cricket -1.335354e-06 12.330335 playing kickball -4.553142e-07 11.254371 catching or throwing baseball -3.1434254e-07 10.883866 shooting goal (soccer) -1.9243305e-07 10.393131 catching or throwing softball -1.309655e-07 10.008317 throwing discus -1.0681848e-07 9.804514 javelin throw -9.446425e-08 9.681604 golf putting -7.842931e-08 9.49558 jogging -7.801767e-08 9.490318 hitting baseball -6.608518e-08 9.3243265 triple jump -3.4878482e-08 8.685253 hurling (sport) -2.996574e-08 8.533437 skateboarding -2.1083935e-08 8.181894 hurdling -2.0079328e-08 8.133075 playing tennis -1.8454037e-08 8.048665 breakdancing -1.7050859e-08 7.969584 hammer throw -1.7004545e-08 7.9668636 shot put -1.4611408e-08 7.8151855 long jump -1.4274541e-08 7.7918606 headbutting diff --git a/out/no_imagenet_flow.txt b/out/no_imagenet_flow.txt deleted file mode 100644 index b9d8fa6..0000000 --- a/out/no_imagenet_flow.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 63.252754 - -Top classes and probabilities -0.9477566 15.957031 playing cricket -0.04068217 12.808723 hurling (sport) -0.004154162 10.527044 playing tennis -0.0024740736 10.0088 playing squash or racquetball -0.0013800215 9.425033 hitting baseball -0.00079995755 8.879737 catching or throwing baseball -0.0007868024 8.863155 sword fighting -0.00076301256 8.832453 catching or throwing softball -0.00024411127 7.6928024 hammer throw -0.00019753167 7.481077 playing badminton -6.432761e-05 6.359167 pumping fist -6.392867e-05 6.3529463 tai chi -3.89561e-05 5.8576136 training dog -3.478504e-05 5.744365 catching or throwing frisbee -3.0313964e-05 5.606787 throwing ball -2.9222967e-05 5.570133 celebrating -2.8943172e-05 5.5605125 shot put -2.3183306e-05 5.3386106 playing ice hockey -2.2206976e-05 5.2955847 golf putting -2.1811395e-05 5.2776113 pole vault diff --git a/out/no_imagenet_joint.txt b/out/no_imagenet_joint.txt deleted file mode 100644 index 81297d8..0000000 --- a/out/no_imagenet_joint.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 138.468658 - -Top classes and probabilities -1.0 41.813675 playing cricket -1.4971563e-09 21.493977 hurling (sport) -3.8431164e-10 20.134108 catching or throwing baseball -1.5492331e-10 19.225584 catching or throwing softball -1.13601656e-10 18.915352 hitting baseball -8.801122e-11 18.660118 playing tennis -2.4415652e-11 17.377878 playing kickball -1.1531949e-11 16.627775 playing squash or racquetball -6.131936e-12 15.996164 shooting goal (soccer) -4.391769e-12 15.662386 hammer throw -2.21341e-12 14.977189 golf putting -1.6307157e-12 14.671673 throwing discus -1.5456367e-12 14.61809 javelin throw -7.6691496e-13 13.917274 pumping fist -5.192979e-13 13.527376 shot put -4.2681665e-13 13.331253 celebrating -2.720572e-13 12.880911 applauding -1.835705e-13 12.487497 throwing ball -1.6134789e-13 12.358461 dodgeball -1.1388395e-13 12.010079 tap dancing diff --git a/out/no_imagenet_rgb.txt b/out/no_imagenet_rgb.txt deleted file mode 100644 index 69af33e..0000000 --- a/out/no_imagenet_rgb.txt +++ /dev/null @@ -1,23 +0,0 @@ -Norm of logits: 87.108871 - -Top classes and probabilities -0.9999968 25.856646 playing cricket -1.335354e-06 12.330335 playing kickball -4.553142e-07 11.254371 catching or throwing baseball -3.1434254e-07 10.883866 shooting goal (soccer) -1.9243305e-07 10.393131 catching or throwing softball -1.309655e-07 10.008317 throwing discus -1.0681848e-07 9.804514 javelin throw -9.446425e-08 9.681604 golf putting -7.842931e-08 9.49558 jogging -7.801767e-08 9.490318 hitting baseball -6.608518e-08 9.3243265 triple jump -3.4878482e-08 8.685253 hurling (sport) -2.996574e-08 8.533437 skateboarding -2.1083935e-08 8.181894 hurdling -2.0079328e-08 8.133075 playing tennis -1.8454037e-08 8.048665 breakdancing -1.7050859e-08 7.969584 hammer throw -1.7004545e-08 7.9668636 shot put -1.4611408e-08 7.8151855 long jump -1.4274541e-08 7.7918606 headbutting diff --git a/tf_v1/.python-version b/tf_v1/.python-version new file mode 100644 index 0000000..cd33751 --- /dev/null +++ b/tf_v1/.python-version @@ -0,0 +1 @@ +3.6.15 diff --git a/data/checkpoints/flow_imagenet/checkpoint b/tf_v1/data/checkpoints/flow_imagenet/checkpoint similarity index 100% rename from data/checkpoints/flow_imagenet/checkpoint rename to tf_v1/data/checkpoints/flow_imagenet/checkpoint diff --git a/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 b/tf_v1/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 similarity index 100% rename from data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 rename to tf_v1/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 diff --git a/data/checkpoints/flow_imagenet/model.ckpt.index b/tf_v1/data/checkpoints/flow_imagenet/model.ckpt.index similarity index 100% rename from data/checkpoints/flow_imagenet/model.ckpt.index rename to tf_v1/data/checkpoints/flow_imagenet/model.ckpt.index diff --git a/data/checkpoints/flow_imagenet/model.ckpt.meta b/tf_v1/data/checkpoints/flow_imagenet/model.ckpt.meta similarity index 100% rename from data/checkpoints/flow_imagenet/model.ckpt.meta rename to tf_v1/data/checkpoints/flow_imagenet/model.ckpt.meta diff --git a/data/checkpoints/flow_scratch/checkpoint b/tf_v1/data/checkpoints/flow_scratch/checkpoint similarity index 100% rename from data/checkpoints/flow_scratch/checkpoint rename to tf_v1/data/checkpoints/flow_scratch/checkpoint diff --git a/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 b/tf_v1/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 similarity index 100% rename from data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 rename to tf_v1/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 diff --git a/data/checkpoints/flow_scratch/model.ckpt.index b/tf_v1/data/checkpoints/flow_scratch/model.ckpt.index similarity index 100% rename from data/checkpoints/flow_scratch/model.ckpt.index rename to tf_v1/data/checkpoints/flow_scratch/model.ckpt.index diff --git a/data/checkpoints/flow_scratch/model.ckpt.meta b/tf_v1/data/checkpoints/flow_scratch/model.ckpt.meta similarity index 100% rename from data/checkpoints/flow_scratch/model.ckpt.meta rename to tf_v1/data/checkpoints/flow_scratch/model.ckpt.meta diff --git a/data/checkpoints/rgb_imagenet/checkpoint b/tf_v1/data/checkpoints/rgb_imagenet/checkpoint similarity index 100% rename from data/checkpoints/rgb_imagenet/checkpoint rename to tf_v1/data/checkpoints/rgb_imagenet/checkpoint diff --git a/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 b/tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 similarity index 100% rename from data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 rename to tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 diff --git a/data/checkpoints/rgb_imagenet/model.ckpt.index b/tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.index similarity index 100% rename from data/checkpoints/rgb_imagenet/model.ckpt.index rename to tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.index diff --git a/data/checkpoints/rgb_imagenet/model.ckpt.meta b/tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.meta similarity index 100% rename from data/checkpoints/rgb_imagenet/model.ckpt.meta rename to tf_v1/data/checkpoints/rgb_imagenet/model.ckpt.meta diff --git a/data/checkpoints/rgb_scratch/checkpoint b/tf_v1/data/checkpoints/rgb_scratch/checkpoint similarity index 100% rename from data/checkpoints/rgb_scratch/checkpoint rename to tf_v1/data/checkpoints/rgb_scratch/checkpoint diff --git a/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 b/tf_v1/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 similarity index 100% rename from data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 rename to tf_v1/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 diff --git a/data/checkpoints/rgb_scratch/model.ckpt.index b/tf_v1/data/checkpoints/rgb_scratch/model.ckpt.index similarity index 100% rename from data/checkpoints/rgb_scratch/model.ckpt.index rename to tf_v1/data/checkpoints/rgb_scratch/model.ckpt.index diff --git a/data/checkpoints/rgb_scratch/model.ckpt.meta b/tf_v1/data/checkpoints/rgb_scratch/model.ckpt.meta similarity index 100% rename from data/checkpoints/rgb_scratch/model.ckpt.meta rename to tf_v1/data/checkpoints/rgb_scratch/model.ckpt.meta diff --git a/data/checkpoints/rgb_scratch_kin600/checkpoint b/tf_v1/data/checkpoints/rgb_scratch_kin600/checkpoint similarity index 100% rename from data/checkpoints/rgb_scratch_kin600/checkpoint rename to tf_v1/data/checkpoints/rgb_scratch_kin600/checkpoint diff --git a/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 b/tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 similarity index 100% rename from data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 rename to tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 diff --git a/data/checkpoints/rgb_scratch_kin600/model.ckpt.index b/tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.index similarity index 100% rename from data/checkpoints/rgb_scratch_kin600/model.ckpt.index rename to tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.index diff --git a/data/checkpoints/rgb_scratch_kin600/model.ckpt.meta b/tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.meta similarity index 100% rename from data/checkpoints/rgb_scratch_kin600/model.ckpt.meta rename to tf_v1/data/checkpoints/rgb_scratch_kin600/model.ckpt.meta diff --git a/data/label_map.txt b/tf_v1/data/label_map.txt similarity index 100% rename from data/label_map.txt rename to tf_v1/data/label_map.txt diff --git a/data/label_map_600.txt b/tf_v1/data/label_map_600.txt similarity index 100% rename from data/label_map_600.txt rename to tf_v1/data/label_map_600.txt diff --git a/data/v_CricketShot_g04_c01_flow.gif b/tf_v1/data/v_CricketShot_g04_c01_flow.gif similarity index 100% rename from data/v_CricketShot_g04_c01_flow.gif rename to tf_v1/data/v_CricketShot_g04_c01_flow.gif diff --git a/data/v_CricketShot_g04_c01_flow.npy b/tf_v1/data/v_CricketShot_g04_c01_flow.npy similarity index 100% rename from data/v_CricketShot_g04_c01_flow.npy rename to tf_v1/data/v_CricketShot_g04_c01_flow.npy diff --git a/data/v_CricketShot_g04_c01_rgb.gif b/tf_v1/data/v_CricketShot_g04_c01_rgb.gif similarity index 100% rename from data/v_CricketShot_g04_c01_rgb.gif rename to tf_v1/data/v_CricketShot_g04_c01_rgb.gif diff --git a/data/v_CricketShot_g04_c01_rgb.npy b/tf_v1/data/v_CricketShot_g04_c01_rgb.npy similarity index 100% rename from data/v_CricketShot_g04_c01_rgb.npy rename to tf_v1/data/v_CricketShot_g04_c01_rgb.npy diff --git a/evaluate_sample.py b/tf_v1/evaluate_sample.py similarity index 97% rename from evaluate_sample.py rename to tf_v1/evaluate_sample.py index b512668..2b8825d 100644 --- a/evaluate_sample.py +++ b/tf_v1/evaluate_sample.py @@ -121,7 +121,7 @@ def main(unused_argv): if imagenet_pretrained: rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet']) else: - rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type]) + rgb_saver.restore(sess, _CHECKPOINT_PATHS["rgb" if eval_type == "joint" else eval_type]) tf.logging.info('RGB checkpoint restored') rgb_sample = np.load(_SAMPLE_PATHS['rgb']) tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape)) @@ -152,4 +152,4 @@ def main(unused_argv): if __name__ == '__main__': - tf.app.run(main) + tf.app.run(main) \ No newline at end of file diff --git a/i3d.py b/tf_v1/i3d.py similarity index 100% rename from i3d.py rename to tf_v1/i3d.py diff --git a/i3d_test.py b/tf_v1/i3d_test.py similarity index 100% rename from i3d_test.py rename to tf_v1/i3d_test.py diff --git a/multi_evaluate.sh b/tf_v1/multi_evaluate.sh similarity index 71% rename from multi_evaluate.sh rename to tf_v1/multi_evaluate.sh index 49165c9..1921488 100755 --- a/multi_evaluate.sh +++ b/tf_v1/multi_evaluate.sh @@ -14,10 +14,10 @@ # limitations under the License. # ============================================================================ -python evaluate_sample.py --imagenet_pretrained true --eval_type joint > out/imagenet_joint.txt -python evaluate_sample.py --imagenet_pretrained true --eval_type flow > out/imagenet_flow.txt -python evaluate_sample.py --imagenet_pretrained true --eval_type rgb > out/imagenet_rgb.txt +python evaluate_sample.py --imagenet_pretrained=true --eval_type joint > out/imagenet_joint.txt +python evaluate_sample.py --imagenet_pretrained=true --eval_type flow > out/imagenet_flow.txt +python evaluate_sample.py --imagenet_pretrained=true --eval_type rgb > out/imagenet_rgb.txt python evaluate_sample.py --imagenet_pretrained=false --eval_type rgb600 > out/no_imagenet_rgb600.txt -python evaluate_sample.py --imagenet_pretrained false --eval_type joint > out/no_imagenet_joint.txt -python evaluate_sample.py --imagenet_pretrained false --eval_type flow > out/no_imagenet_flow.txt -python evaluate_sample.py --imagenet_pretrained false --eval_type rgb > out/no_imagenet_rgb.txt +python evaluate_sample.py --imagenet_pretrained=false --eval_type joint > out/no_imagenet_joint.txt +python evaluate_sample.py --imagenet_pretrained=false --eval_type flow > out/no_imagenet_flow.txt +python evaluate_sample.py --imagenet_pretrained=false --eval_type rgb > out/no_imagenet_rgb.txt diff --git a/tf_v1/out/imagenet_flow.txt b/tf_v1/out/imagenet_flow.txt new file mode 100644 index 0000000..b4a1e4d --- /dev/null +++ b/tf_v1/out/imagenet_flow.txt @@ -0,0 +1,23 @@ +Norm of logits: 63.252735 + +Top classes and probabilities +0.9477567 15.957029 playing cricket +0.040682137 12.808721 hurling (sport) +0.0041541667 10.527043 playing tennis +0.0024740787 10.0088 playing squash or racquetball +0.0013800191 9.425029 hitting baseball +0.00079995533 8.879732 catching or throwing baseball +0.0007868025 8.863153 sword fighting +0.00076300907 8.832446 catching or throwing softball +0.00024411014 7.6927958 hammer throw +0.0001975317 7.481076 playing badminton +6.432749e-05 6.3591633 pumping fist +6.39285e-05 6.3529415 tai chi +3.8956216e-05 5.8576145 training dog +3.4785047e-05 5.7443633 catching or throwing frisbee +3.0313882e-05 5.606782 throwing ball +2.922297e-05 5.570131 celebrating +2.8943148e-05 5.5605097 shot put +2.3183286e-05 5.3386083 playing ice hockey +2.2207e-05 5.2955837 golf putting +2.181148e-05 5.277613 pole vault diff --git a/tf_v1/out/imagenet_joint.txt b/tf_v1/out/imagenet_joint.txt new file mode 100644 index 0000000..80b839f --- /dev/null +++ b/tf_v1/out/imagenet_joint.txt @@ -0,0 +1,23 @@ +Norm of logits: 138.468613 + +Top classes and probabilities +1.0 41.813667 playing cricket +1.4971563e-09 21.493969 hurling (sport) +3.8431092e-10 20.134098 catching or throwing baseball +1.549236e-10 19.225578 catching or throwing softball +1.13602086e-10 18.915348 hitting baseball +8.801189e-11 18.660118 playing tennis +2.4415699e-11 17.377872 playing kickball +1.15319716e-11 16.62777 playing squash or racquetball +6.1319474e-12 15.996159 shooting goal (soccer) +4.391761e-12 15.662376 hammer throw +2.2134141e-12 14.977182 golf putting +1.6307188e-12 14.671667 throwing discus +1.5456307e-12 14.618078 javelin throw +7.669135e-13 13.917265 pumping fist +5.192998e-13 13.527372 shot put +4.2681746e-13 13.331247 celebrating +2.7205825e-13 12.880907 applauding +1.8357085e-13 12.487492 throwing ball +1.6134758e-13 12.358452 dodgeball +1.1388438e-13 12.010075 tap dancing diff --git a/tf_v1/out/imagenet_rgb.txt b/tf_v1/out/imagenet_rgb.txt new file mode 100644 index 0000000..a0fb81c --- /dev/null +++ b/tf_v1/out/imagenet_rgb.txt @@ -0,0 +1,23 @@ +Norm of logits: 87.108841 + +Top classes and probabilities +0.9999968 25.856636 playing cricket +1.3353629e-06 12.330332 playing kickball +4.5531678e-07 11.254367 catching or throwing baseball +3.1434524e-07 10.883865 shooting goal (soccer) +1.9243487e-07 10.393131 catching or throwing softball +1.3096626e-07 10.008313 throwing discus +1.0681869e-07 9.804506 javelin throw +9.44646e-08 9.681599 golf putting +7.8430055e-08 9.495581 jogging +7.8018566e-08 9.49032 hitting baseball +6.6085555e-08 9.324324 triple jump +3.4878617e-08 8.685247 hurling (sport) +2.996574e-08 8.533429 skateboarding +2.1084094e-08 8.181892 hurdling +2.007952e-08 8.133074 playing tennis +1.8454177e-08 8.048664 breakdancing +1.7050988e-08 7.9695807 hammer throw +1.7004707e-08 7.966863 shot put +1.46115475e-08 7.8151855 long jump +1.4274677e-08 7.79186 headbutting diff --git a/tf_v1/out/no_imagenet_flow.txt b/tf_v1/out/no_imagenet_flow.txt new file mode 100644 index 0000000..69a549e --- /dev/null +++ b/tf_v1/out/no_imagenet_flow.txt @@ -0,0 +1,23 @@ +Norm of logits: 85.503830 + +Top classes and probabilities +0.6976966 15.426945 playing cricket +0.12184773 13.681932 hurling (sport) +0.051920377 12.828872 hitting baseball +0.018172622 11.779077 catching or throwing softball +0.016869571 11.704672 playing squash or racquetball +0.016614497 11.689436 playing tennis +0.0153879095 11.612742 catching or throwing baseball +0.013855016 11.507808 playing badminton +0.010666153 11.246236 sword fighting +0.008051235 10.964986 shooting goal (soccer) +0.0073142387 10.868983 passing American football (in game) +0.0042985766 10.337444 dodgeball +0.0042566485 10.327642 playing basketball +0.001713437 9.417662 playing volleyball +0.0014168038 9.227564 kicking field goal +0.0012208477 9.078706 high kick +0.000961758 8.840168 catching or throwing frisbee +0.00092789374 8.804322 javelin throw +0.0009020383 8.776062 dunking basketball +0.0005681694 8.313825 throwing ball diff --git a/tf_v1/out/no_imagenet_joint.txt b/tf_v1/out/no_imagenet_joint.txt new file mode 100644 index 0000000..cf7f362 --- /dev/null +++ b/tf_v1/out/no_imagenet_joint.txt @@ -0,0 +1,23 @@ +Norm of logits: 162.726562 + +Top classes and probabilities +0.9999579 34.915543 playing cricket +3.547991e-05 24.66904 hurling (sport) +3.3652223e-06 22.313568 hitting baseball +2.3034072e-06 21.934464 shooting goal (soccer) +4.9864104e-07 20.404205 catching or throwing baseball +1.12062864e-07 18.911379 playing badminton +8.978248e-08 18.689709 catching or throwing softball +8.465664e-08 18.630922 breakdancing +4.8843876e-08 18.080948 playing tennis +3.8058737e-08 17.83145 playing squash or racquetball +3.7293937e-08 17.81115 robot dancing +1.33325075e-08 16.782524 playing basketball +1.1467065e-08 16.631798 playing volleyball +1.0097457e-08 16.504602 tap dancing +1.00795035e-08 16.502823 pumping fist +7.2048016e-09 16.167067 cheerleading +5.24042e-09 15.848721 golf putting +3.998574e-09 15.578257 playing kickball +3.6800143e-09 15.4952345 sword fighting +3.158014e-09 15.342262 dodgeball diff --git a/tf_v1/out/no_imagenet_rgb.txt b/tf_v1/out/no_imagenet_rgb.txt new file mode 100644 index 0000000..ba07229 --- /dev/null +++ b/tf_v1/out/no_imagenet_rgb.txt @@ -0,0 +1,23 @@ +Norm of logits: 87.217957 + +Top classes and probabilities +0.9968959 19.488598 playing cricket +0.0011161345 12.693823 robot dancing +0.000777455 12.332222 pumping fist +0.00040222323 11.673203 breakdancing +0.00020253485 10.987108 hurling (sport) +0.00019899553 10.969479 shooting goal (soccer) +8.2213286e-05 10.085513 celebrating +4.5082772e-05 9.484696 hitting baseball +2.5490284e-05 8.914494 tap dancing +2.2539461e-05 8.791464 catching or throwing baseball +2.1061125e-05 8.723625 krumping +1.966872e-05 8.655226 applauding +1.6667953e-05 8.489684 jumpstyle dancing +1.4761088e-05 8.368191 golf putting +1.4261479e-05 8.333758 dancing charleston +1.3878322e-05 8.306524 skateboarding +9.491915e-06 7.9266367 juggling soccer ball +8.952797e-06 7.8681617 cheerleading +8.331946e-06 7.7962933 playing kickball +6.648238e-06 7.570548 headbutting diff --git a/tf_v1/out/no_imagenet_rgb600.txt b/tf_v1/out/no_imagenet_rgb600.txt new file mode 100644 index 0000000..851d1f4 --- /dev/null +++ b/tf_v1/out/no_imagenet_rgb600.txt @@ -0,0 +1,23 @@ +Norm of logits: 113.826424 + +Top classes and probabilities +0.93890214 19.42345 playing cricket +0.044067338 16.364458 shooting goal (soccer) +0.009324817 14.811419 hurling (sport) +0.0019039277 13.222658 passing soccer ball +0.0013803013 12.901041 headbutting +0.0012852419 12.829686 tackling +0.00046463843 11.812243 golf putting +0.00035802208 11.5515785 hammer throw +0.00031406645 11.4205885 kicking field goal +0.00026079742 11.234728 tossing coin +0.00023422384 11.127261 robot dancing +0.00015392243 10.707433 catching or throwing softball +0.00013323939 10.563131 celebrating +0.00012615332 10.508482 playing squash or racquetball +0.00011880849 10.448497 throwing discus +9.273407e-05 10.20072 dodgeball +9.0558075e-05 10.176975 golf chipping +7.9254016e-05 10.043642 catching or throwing baseball +6.984644e-05 9.917283 juggling soccer ball +6.630085e-05 9.865187 golf driving diff --git a/tf_v1/requirements.txt b/tf_v1/requirements.txt new file mode 100644 index 0000000..a08ac50 --- /dev/null +++ b/tf_v1/requirements.txt @@ -0,0 +1,4 @@ +protobuf==3.19.6 +tensorflow==1.14 +dm-sonnet==1.36 +tensorflow-probability==0.7 \ No newline at end of file diff --git a/tf_v2/.python-version b/tf_v2/.python-version new file mode 100644 index 0000000..2009c7d --- /dev/null +++ b/tf_v2/.python-version @@ -0,0 +1 @@ +3.9.2 diff --git a/tf_v2/data/checkpoints/flow_imagenet/checkpoint b/tf_v2/data/checkpoints/flow_imagenet/checkpoint new file mode 100644 index 0000000..febd7d5 --- /dev/null +++ b/tf_v2/data/checkpoints/flow_imagenet/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt" diff --git a/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 b/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..c699266 Binary files /dev/null and b/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.data-00000-of-00001 differ diff --git a/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.index b/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.index new file mode 100644 index 0000000..a58ba9e Binary files /dev/null and b/tf_v2/data/checkpoints/flow_imagenet/model.ckpt.index differ diff --git a/tf_v2/data/checkpoints/flow_scratch/checkpoint b/tf_v2/data/checkpoints/flow_scratch/checkpoint new file mode 100644 index 0000000..febd7d5 --- /dev/null +++ b/tf_v2/data/checkpoints/flow_scratch/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt" diff --git a/tf_v2/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 b/tf_v2/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..061a0af Binary files /dev/null and b/tf_v2/data/checkpoints/flow_scratch/model.ckpt.data-00000-of-00001 differ diff --git a/tf_v2/data/checkpoints/flow_scratch/model.ckpt.index b/tf_v2/data/checkpoints/flow_scratch/model.ckpt.index new file mode 100644 index 0000000..0cde892 Binary files /dev/null and b/tf_v2/data/checkpoints/flow_scratch/model.ckpt.index differ diff --git a/tf_v2/data/checkpoints/rgb_imagenet/checkpoint b/tf_v2/data/checkpoints/rgb_imagenet/checkpoint new file mode 100644 index 0000000..ae50dd9 --- /dev/null +++ b/tf_v2/data/checkpoints/rgb_imagenet/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt-" +all_model_checkpoint_paths: "model.ckpt" diff --git a/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 b/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..f94f4bf Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.data-00000-of-00001 differ diff --git a/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.index b/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.index new file mode 100644 index 0000000..6766831 Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_imagenet/model.ckpt.index differ diff --git a/tf_v2/data/checkpoints/rgb_scratch/checkpoint b/tf_v2/data/checkpoints/rgb_scratch/checkpoint new file mode 100644 index 0000000..febd7d5 --- /dev/null +++ b/tf_v2/data/checkpoints/rgb_scratch/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt" diff --git a/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 b/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..e539fa1 Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.data-00000-of-00001 differ diff --git a/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.index b/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.index new file mode 100644 index 0000000..51d2474 Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_scratch/model.ckpt.index differ diff --git a/tf_v2/data/checkpoints/rgb_scratch_kin600/checkpoint b/tf_v2/data/checkpoints/rgb_scratch_kin600/checkpoint new file mode 100644 index 0000000..25f8196 --- /dev/null +++ b/tf_v2/data/checkpoints/rgb_scratch_kin600/checkpoint @@ -0,0 +1,3 @@ +model_checkpoint_path: "model.ckpt" +all_model_checkpoint_paths: "model.ckpt +" diff --git a/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 b/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..c580b37 Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.data-00000-of-00001 differ diff --git a/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.index b/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.index new file mode 100644 index 0000000..9ce217d Binary files /dev/null and b/tf_v2/data/checkpoints/rgb_scratch_kin600/model.ckpt.index differ diff --git a/tf_v2/data/label_map.txt b/tf_v2/data/label_map.txt new file mode 100644 index 0000000..cdaafcb --- /dev/null +++ b/tf_v2/data/label_map.txt @@ -0,0 +1,400 @@ +abseiling +air drumming +answering questions +applauding +applying cream +archery +arm wrestling +arranging flowers +assembling computer +auctioning +baby waking up +baking cookies +balloon blowing +bandaging +barbequing +bartending +beatboxing +bee keeping +belly dancing +bench pressing +bending back +bending metal +biking through snow +blasting sand +blowing glass +blowing leaves +blowing nose +blowing out candles +bobsledding +bookbinding +bouncing on trampoline +bowling +braiding hair +breading or breadcrumbing +breakdancing +brush painting +brushing hair +brushing teeth +building cabinet +building shed +bungee jumping +busking +canoeing or kayaking +capoeira +carrying baby +cartwheeling +carving pumpkin +catching fish +catching or throwing baseball +catching or throwing frisbee +catching or throwing softball +celebrating +changing oil +changing wheel +checking tires +cheerleading +chopping wood +clapping +clay pottery making +clean and jerk +cleaning floor +cleaning gutters +cleaning pool +cleaning shoes +cleaning toilet +cleaning windows +climbing a rope +climbing ladder +climbing tree +contact juggling +cooking chicken +cooking egg +cooking on campfire +cooking sausages +counting money +country line dancing +cracking neck +crawling baby +crossing river +crying +curling hair +cutting nails +cutting pineapple +cutting watermelon +dancing ballet +dancing charleston +dancing gangnam style +dancing macarena +deadlifting +decorating the christmas tree +digging +dining +disc golfing +diving cliff +dodgeball +doing aerobics +doing laundry +doing nails +drawing +dribbling basketball +drinking +drinking beer +drinking shots +driving car +driving tractor +drop kicking +drumming fingers +dunking basketball +dying hair +eating burger +eating cake +eating carrots +eating chips +eating doughnuts +eating hotdog +eating ice cream +eating spaghetti +eating watermelon +egg hunting +exercising arm +exercising with an exercise ball +extinguishing fire +faceplanting +feeding birds +feeding fish +feeding goats +filling eyebrows +finger snapping +fixing hair +flipping pancake +flying kite +folding clothes +folding napkins +folding paper +front raises +frying vegetables +garbage collecting +gargling +getting a haircut +getting a tattoo +giving or receiving award +golf chipping +golf driving +golf putting +grinding meat +grooming dog +grooming horse +gymnastics tumbling +hammer throw +headbanging +headbutting +high jump +high kick +hitting baseball +hockey stop +holding snake +hopscotch +hoverboarding +hugging +hula hooping +hurdling +hurling (sport) +ice climbing +ice fishing +ice skating +ironing +javelin throw +jetskiing +jogging +juggling balls +juggling fire +juggling soccer ball +jumping into pool +jumpstyle dancing +kicking field goal +kicking soccer ball +kissing +kitesurfing +knitting +krumping +laughing +laying bricks +long jump +lunge +making a cake +making a sandwich +making bed +making jewelry +making pizza +making snowman +making sushi +making tea +marching +massaging back +massaging feet +massaging legs +massaging person's head +milking cow +mopping floor +motorcycling +moving furniture +mowing lawn +news anchoring +opening bottle +opening present +paragliding +parasailing +parkour +passing American football (in game) +passing American football (not in game) +peeling apples +peeling potatoes +petting animal (not cat) +petting cat +picking fruit +planting trees +plastering +playing accordion +playing badminton +playing bagpipes +playing basketball +playing bass guitar +playing cards +playing cello +playing chess +playing clarinet +playing controller +playing cricket +playing cymbals +playing didgeridoo +playing drums +playing flute +playing guitar +playing harmonica +playing harp +playing ice hockey +playing keyboard +playing kickball +playing monopoly +playing organ +playing paintball +playing piano +playing poker +playing recorder +playing saxophone +playing squash or racquetball +playing tennis +playing trombone +playing trumpet +playing ukulele +playing violin +playing volleyball +playing xylophone +pole vault +presenting weather forecast +pull ups +pumping fist +pumping gas +punching bag +punching person (boxing) +push up +pushing car +pushing cart +pushing wheelchair +reading book +reading newspaper +recording music +riding a bike +riding camel +riding elephant +riding mechanical bull +riding mountain bike +riding mule +riding or walking with horse +riding scooter +riding unicycle +ripping paper +robot dancing +rock climbing +rock scissors paper +roller skating +running on treadmill +sailing +salsa dancing +sanding floor +scrambling eggs +scuba diving +setting table +shaking hands +shaking head +sharpening knives +sharpening pencil +shaving head +shaving legs +shearing sheep +shining shoes +shooting basketball +shooting goal (soccer) +shot put +shoveling snow +shredding paper +shuffling cards +side kick +sign language interpreting +singing +situp +skateboarding +ski jumping +skiing (not slalom or crosscountry) +skiing crosscountry +skiing slalom +skipping rope +skydiving +slacklining +slapping +sled dog racing +smoking +smoking hookah +snatch weight lifting +sneezing +sniffing +snorkeling +snowboarding +snowkiting +snowmobiling +somersaulting +spinning poi +spray painting +spraying +springboard diving +squat +sticking tongue out +stomping grapes +stretching arm +stretching leg +strumming guitar +surfing crowd +surfing water +sweeping floor +swimming backstroke +swimming breast stroke +swimming butterfly stroke +swing dancing +swinging legs +swinging on something +sword fighting +tai chi +taking a shower +tango dancing +tap dancing +tapping guitar +tapping pen +tasting beer +tasting food +testifying +texting +throwing axe +throwing ball +throwing discus +tickling +tobogganing +tossing coin +tossing salad +training dog +trapezing +trimming or shaving beard +trimming trees +triple jump +tying bow tie +tying knot (not on a tie) +tying tie +unboxing +unloading truck +using computer +using remote controller (not gaming) +using segway +vault +waiting in line +walking the dog +washing dishes +washing feet +washing hair +washing hands +water skiing +water sliding +watering plants +waxing back +waxing chest +waxing eyebrows +waxing legs +weaving basket +welding +whistling +windsurfing +wrapping present +wrestling +writing +yawning +yoga +zumba diff --git a/tf_v2/data/label_map_600.txt b/tf_v2/data/label_map_600.txt new file mode 100644 index 0000000..639e9c9 --- /dev/null +++ b/tf_v2/data/label_map_600.txt @@ -0,0 +1,600 @@ +abseiling +acting in play +adjusting glasses +air drumming +alligator wrestling +answering questions +applauding +applying cream +archaeological excavation +archery +arguing +arm wrestling +arranging flowers +assembling bicycle +assembling computer +attending conference +auctioning +backflip (human) +baking cookies +bandaging +barbequing +bartending +base jumping +bathing dog +battle rope training +beatboxing +bee keeping +belly dancing +bench pressing +bending back +bending metal +biking through snow +blasting sand +blowdrying hair +blowing bubble gum +blowing glass +blowing leaves +blowing nose +blowing out candles +bobsledding +bodysurfing +bookbinding +bottling +bouncing on bouncy castle +bouncing on trampoline +bowling +braiding hair +breading or breadcrumbing +breakdancing +breaking boards +breathing fire +brush painting +brushing hair +brushing teeth +building cabinet +building lego +building sandcastle +building shed +bull fighting +bulldozing +bungee jumping +burping +busking +calculating +calligraphy +canoeing or kayaking +capoeira +capsizing +card stacking +card throwing +carrying baby +cartwheeling +carving ice +carving pumpkin +casting fishing line +catching fish +catching or throwing baseball +catching or throwing frisbee +catching or throwing softball +celebrating +changing gear in car +changing oil +changing wheel (not on bike) +checking tires +cheerleading +chewing gum +chiseling stone +chiseling wood +chopping meat +chopping vegetables +chopping wood +clam digging +clapping +clay pottery making +clean and jerk +cleaning gutters +cleaning pool +cleaning shoes +cleaning toilet +cleaning windows +climbing a rope +climbing ladder +climbing tree +coloring in +combing hair +contact juggling +contorting +cooking egg +cooking on campfire +cooking sausages (not on barbeque) +cooking scallops +cosplaying +counting money +country line dancing +cracking back +cracking knuckles +cracking neck +crawling baby +crossing eyes +crossing river +crying +cumbia +curling (sport) +curling hair +cutting apple +cutting nails +cutting orange +cutting pineapple +cutting watermelon +dancing ballet +dancing charleston +dancing gangnam style +dancing macarena +deadlifting +decorating the christmas tree +delivering mail +dining +directing traffic +disc golfing +diving cliff +docking boat +dodgeball +doing aerobics +doing jigsaw puzzle +doing laundry +doing nails +drawing +dribbling basketball +drinking shots +driving car +driving tractor +drooling +drop kicking +drumming fingers +dumpster diving +dunking basketball +dyeing eyebrows +dyeing hair +eating burger +eating cake +eating carrots +eating chips +eating doughnuts +eating hotdog +eating ice cream +eating spaghetti +eating watermelon +egg hunting +embroidering +exercising with an exercise ball +extinguishing fire +faceplanting +falling off bike +falling off chair +feeding birds +feeding fish +feeding goats +fencing (sport) +fidgeting +finger snapping +fixing bicycle +fixing hair +flint knapping +flipping pancake +fly tying +flying kite +folding clothes +folding napkins +folding paper +front raises +frying vegetables +geocaching +getting a haircut +getting a piercing +getting a tattoo +giving or receiving award +gold panning +golf chipping +golf driving +golf putting +gospel singing in church +grinding meat +grooming dog +grooming horse +gymnastics tumbling +hammer throw +hand washing clothes +head stand +headbanging +headbutting +high jump +high kick +historical reenactment +hitting baseball +hockey stop +holding snake +home roasting coffee +hopscotch +hoverboarding +huddling +hugging (not baby) +hugging baby +hula hooping +hurdling +hurling (sport) +ice climbing +ice fishing +ice skating +ice swimming +inflating balloons +installing carpet +ironing +ironing hair +javelin throw +jaywalking +jetskiing +jogging +juggling balls +juggling fire +juggling soccer ball +jumping bicycle +jumping into pool +jumping jacks +jumpstyle dancing +karaoke +kicking field goal +kicking soccer ball +kissing +kitesurfing +knitting +krumping +land sailing +laughing +lawn mower racing +laying bricks +laying concrete +laying stone +laying tiles +leatherworking +licking +lifting hat +lighting fire +lock picking +long jump +longboarding +looking at phone +luge +lunge +making a cake +making a sandwich +making balloon shapes +making bubbles +making cheese +making horseshoes +making jewelry +making paper aeroplanes +making pizza +making snowman +making sushi +making tea +making the bed +marching +marriage proposal +massaging back +massaging feet +massaging legs +massaging neck +massaging person's head +milking cow +moon walking +mopping floor +mosh pit dancing +motorcycling +mountain climber (exercise) +moving furniture +mowing lawn +mushroom foraging +needle felting +news anchoring +opening bottle (not wine) +opening door +opening present +opening refrigerator +opening wine bottle +packing +paragliding +parasailing +parkour +passing American football (in game) +passing american football (not in game) +passing soccer ball +peeling apples +peeling potatoes +person collecting garbage +petting animal (not cat) +petting cat +photobombing +photocopying +picking fruit +pillow fight +pinching +pirouetting +planing wood +planting trees +plastering +playing accordion +playing badminton +playing bagpipes +playing basketball +playing bass guitar +playing beer pong +playing blackjack +playing cello +playing chess +playing clarinet +playing controller +playing cricket +playing cymbals +playing darts +playing didgeridoo +playing dominoes +playing drums +playing field hockey +playing flute +playing gong +playing guitar +playing hand clapping games +playing harmonica +playing harp +playing ice hockey +playing keyboard +playing kickball +playing laser tag +playing lute +playing maracas +playing marbles +playing monopoly +playing netball +playing ocarina +playing organ +playing paintball +playing pan pipes +playing piano +playing pinball +playing ping pong +playing poker +playing polo +playing recorder +playing rubiks cube +playing saxophone +playing scrabble +playing squash or racquetball +playing tennis +playing trombone +playing trumpet +playing ukulele +playing violin +playing volleyball +playing with trains +playing xylophone +poking bellybutton +pole vault +polishing metal +popping balloons +pouring beer +preparing salad +presenting weather forecast +pull ups +pumping fist +pumping gas +punching bag +punching person (boxing) +push up +pushing car +pushing cart +pushing wheelbarrow +pushing wheelchair +putting in contact lenses +putting on eyeliner +putting on foundation +putting on lipstick +putting on mascara +putting on sari +putting on shoes +raising eyebrows +reading book +reading newspaper +recording music +repairing puncture +riding a bike +riding camel +riding elephant +riding mechanical bull +riding mule +riding or walking with horse +riding scooter +riding snow blower +riding unicycle +ripping paper +roasting marshmallows +roasting pig +robot dancing +rock climbing +rock scissors paper +roller skating +rolling pastry +rope pushdown +running on treadmill +sailing +salsa dancing +sanding floor +sausage making +sawing wood +scrambling eggs +scrapbooking +scrubbing face +scuba diving +separating eggs +setting table +sewing +shaking hands +shaking head +shaping bread dough +sharpening knives +sharpening pencil +shaving head +shaving legs +shearing sheep +shining flashlight +shining shoes +shooting basketball +shooting goal (soccer) +shopping +shot put +shoveling snow +shucking oysters +shuffling cards +shuffling feet +side kick +sign language interpreting +singing +sipping cup +situp +skateboarding +ski jumping +skiing crosscountry +skiing mono +skiing slalom +skipping rope +skipping stone +skydiving +slacklining +slapping +sled dog racing +sleeping +smashing +smelling feet +smoking +smoking hookah +smoking pipe +snatch weight lifting +sneezing +snorkeling +snowboarding +snowkiting +snowmobiling +somersaulting +spelunking +spinning poi +spray painting +springboard diving +square dancing +squat +standing on hands +staring +steer roping +sticking tongue out +stomping grapes +stretching arm +stretching leg +sucking lolly +surfing crowd +surfing water +sweeping floor +swimming backstroke +swimming breast stroke +swimming butterfly stroke +swimming front crawl +swing dancing +swinging baseball bat +swinging on something +sword fighting +sword swallowing +tackling +tagging graffiti +tai chi +talking on cell phone +tango dancing +tap dancing +tapping guitar +tapping pen +tasting beer +tasting food +tasting wine +testifying +texting +threading needle +throwing axe +throwing ball (not baseball or American football) +throwing discus +throwing knife +throwing snowballs +throwing tantrum +throwing water balloon +tickling +tie dying +tightrope walking +tiptoeing +tobogganing +tossing coin +training dog +trapezing +trimming or shaving beard +trimming shrubs +trimming trees +triple jump +twiddling fingers +tying bow tie +tying knot (not on a tie) +tying necktie +tying shoe laces +unboxing +unloading truck +using a microscope +using a paint roller +using a power drill +using a sledge hammer +using a wrench +using atm +using bagging machine +using circular saw +using inhaler +using puppets +using remote controller (not gaming) +using segway +vacuuming floor +visiting the zoo +wading through mud +wading through water +waiting in line +waking up +walking the dog +walking through snow +washing dishes +washing feet +washing hair +washing hands +watching tv +water skiing +water sliding +watering plants +waving hand +waxing back +waxing chest +waxing eyebrows +waxing legs +weaving basket +weaving fabric +welding +whistling +windsurfing +winking +wood burning (art) +wrapping present +wrestling +writing +yarn spinning +yawning +yoga +zumba diff --git a/tf_v2/data/v_CricketShot_g04_c01_flow.gif b/tf_v2/data/v_CricketShot_g04_c01_flow.gif new file mode 100644 index 0000000..c69232f Binary files /dev/null and b/tf_v2/data/v_CricketShot_g04_c01_flow.gif differ diff --git a/tf_v2/data/v_CricketShot_g04_c01_flow.npy b/tf_v2/data/v_CricketShot_g04_c01_flow.npy new file mode 100644 index 0000000..ee00e78 Binary files /dev/null and b/tf_v2/data/v_CricketShot_g04_c01_flow.npy differ diff --git a/tf_v2/data/v_CricketShot_g04_c01_rgb.gif b/tf_v2/data/v_CricketShot_g04_c01_rgb.gif new file mode 100644 index 0000000..688ca2f Binary files /dev/null and b/tf_v2/data/v_CricketShot_g04_c01_rgb.gif differ diff --git a/tf_v2/data/v_CricketShot_g04_c01_rgb.npy b/tf_v2/data/v_CricketShot_g04_c01_rgb.npy new file mode 100644 index 0000000..d756c50 Binary files /dev/null and b/tf_v2/data/v_CricketShot_g04_c01_rgb.npy differ diff --git a/tf_v2/evaluate_sample.py b/tf_v2/evaluate_sample.py new file mode 100644 index 0000000..695759d --- /dev/null +++ b/tf_v2/evaluate_sample.py @@ -0,0 +1,150 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Loads a sample video and classifies using a trained Kinetics checkpoint.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import argparse +import numpy as np +import tensorflow as tf + +import i3d + +_SAMPLE_PATHS = { + "rgb": "data/v_CricketShot_g04_c01_rgb.npy", + "flow": "data/v_CricketShot_g04_c01_flow.npy", +} + +_CHECKPOINT_PATHS_SCRATCH = { + "rgb": "data/checkpoints/rgb_scratch/model.ckpt", + "flow": "data/checkpoints/flow_scratch/model.ckpt", + "rgb600": "data/checkpoints/rgb_scratch_kin600/model.ckpt", +} + +_CHECKPOINT_PATHS_IMAGENET = { + "rgb": "data/checkpoints/rgb_imagenet/model.ckpt", + "flow": "data/checkpoints/flow_imagenet/model.ckpt", +} + +_LABEL_MAP_PATH = "data/label_map.txt" +_LABEL_MAP_PATH_600 = "data/label_map_600.txt" + + +def parse_args() -> argparse.Namespace: + """ + Parse arguments + """ + + parser = argparse.ArgumentParser() + + parser.add_argument( + "--eval_type", + default="joint", + choices=["rgb", "flow", "joint", "rgb600"], + help="Type of evaluation", + ) + + parser.add_argument( + "--imagenet_pretrained", + action="store_true", + help="Use ImageNet pretrained weights. Not availble for rgb600", + ) + + return parser.parse_args() + + +def main(): + """ + Main function to evaluate I3D on Kinetics. + """ + + logging.basicConfig(level=logging.INFO) + + args = parse_args() + + eval_type = args.eval_type + imagenet_pretrained = args.imagenet_pretrained + + if eval_type == "rgb600" and imagenet_pretrained: + raise ValueError("Kinetics 600 not available for ImageNet pretrained model") + + _checkpoint_paths = ( + _CHECKPOINT_PATHS_IMAGENET if imagenet_pretrained else _CHECKPOINT_PATHS_SCRATCH + ) + + kinetics_classes = ( + [x.strip() for x in open(_LABEL_MAP_PATH_600, encoding="utf-8")] + if eval_type == "rgb600" + else [x.strip() for x in open(_LABEL_MAP_PATH, encoding="utf-8")] + ) + + num_classes = 600 if eval_type == "rgb600" else 400 + + if eval_type in ["rgb", "rgb600", "joint"]: + # Instantiate the model for RGB + rgb_model = i3d.InceptionI3d(num_classes, spatial_squeeze=True, final_endpoint="Logits") + + # Restore the checkpoint + tf.train.Checkpoint(model=rgb_model).restore( + _checkpoint_paths["rgb600" if eval_type == "rgb600" else "rgb"] + ) + logging.info("RGB checkpoint restored") + + # Load the sample video + rgb_sample = tf.convert_to_tensor(np.load(_SAMPLE_PATHS["rgb"]), dtype=tf.float32) + logging.info("RGB sample loaded") + + # Run the model + rgb_logits, _ = rgb_model(rgb_sample) + + if eval_type in ["flow", "joint"]: + # Instantiate the model for flow + flow_model = i3d.InceptionI3d(num_classes, spatial_squeeze=True, final_endpoint="Logits") + + # Restore the checkpoint + tf.train.Checkpoint(model=flow_model).restore(_checkpoint_paths["flow"]) + logging.info("Flow checkpoint restored") + + # Load the sample video + flow_sample = tf.convert_to_tensor(np.load(_SAMPLE_PATHS["flow"]), dtype=tf.float32) + logging.info("Flow sample loaded") + + # Run the model + flow_logits, _ = flow_model(flow_sample) + + if eval_type in ["rgb", "rgb600"]: + out_logits = rgb_logits + elif eval_type == "flow": + out_logits = flow_logits + else: + out_logits = rgb_logits + flow_logits + + out_predictions = tf.nn.softmax(out_logits) + + out_logits = out_logits[0] + out_predictions = out_predictions[0] + sorted_indices = np.argsort(out_predictions)[::-1] + + print(f"Norm of logits: {np.linalg.norm(out_logits)}") + print("\nTop classes and probabilities") + for index in sorted_indices[:20]: + print(out_predictions[index].numpy(), out_logits[index].numpy(), kinetics_classes[index]) + + +if __name__ == "__main__": + main() diff --git a/tf_v2/i3d.py b/tf_v2/i3d.py new file mode 100644 index 0000000..2adacc1 --- /dev/null +++ b/tf_v2/i3d.py @@ -0,0 +1,663 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Inception-v1 Inflated 3D ConvNet used for Kinetics CVPR paper. + +The model is introduced in: + + Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset + Joao Carreira, Andrew Zisserman + https://arxiv.org/pdf/1705.07750v1.pdf. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sonnet as snt +import tensorflow as tf + + +class Unit3D(snt.Module): + """Basic unit containing Conv3D + BatchNorm + non-linearity.""" + + def __init__( + self, + output_channels, + kernel_shape=(1, 1, 1), + stride=(1, 1, 1), + activation_fn=tf.nn.relu, + use_batch_norm=True, + use_bias=False, + is_training=False, + name="unit_3d", + ): + """ + Initializes Unit3D module. + + Args: + output_channels: number of output channels (int). + kernel_shape: shape of the convolutional kernel (iterable of 3 ints). + stride: shape of the convolutional stride (iterable of 3 ints). + activation_fn: activation function (callable). + use_batch_norm: whether to use batch normalization (boolean). + use_bias: whether to use bias (boolean). + is_training: whether to use training mode for snt.BatchNorm (boolean). + name: name of the module (string). + """ + super(Unit3D, self).__init__(name=name) + + self._use_batch_norm = use_batch_norm + self._activation_fn = activation_fn + self._is_training = is_training + + # layers + + self.conv3d = snt.Conv3D( + output_channels=output_channels, + kernel_shape=kernel_shape, + stride=stride, + padding="SAME", + with_bias=use_bias, + name="conv_3d", + ) + + self.batch_norm = snt.BatchNorm( + create_scale=False, + create_offset=True, + ) + + def __call__(self, net): + """ + Connects the module to inputs. + + Args: + net: Inputs to the Unit3D component. + + Returns: + Outputs from the module. + """ + net = self.conv3d(net) + + if self._use_batch_norm: + net = self.batch_norm(net, is_training=self._is_training, test_local_stats=False) + + if self._activation_fn is not None: + net = self._activation_fn(net) + + return net + + +class MixedLayerBranch(snt.Module): + """ + Used to create the branches of the mixed layers. + """ + + def __init__( + self, + name, + is_training, + a_output_channels, + a_kernel_shape, + a_name, + b_output_channels, + b_kernel, + b_name, + ): + """ + Initializes MixedLayerBranch module. + + Args: + name: name of the module (string). + is_training: whether to use training mode for snt.BatchNorm (boolean). + a_output_channels: number of output channels for branch A (int). + a_kernel_shape: shape of the convolutional kernel for branch A (iterable of 3 ints). + a_name: name of branch A (string). + b_output_channels: number of output channels for branch B (int). + b_kernel: shape of the convolutional kernel for branch B (iterable of 3 ints). + b_name: name of branch B (string). + """ + super(MixedLayerBranch, self).__init__(name=name) + + if a_output_channels is not None and a_kernel_shape is not None: + self.branch_a = Unit3D( + output_channels=a_output_channels, + kernel_shape=a_kernel_shape, + is_training=is_training, + name=a_name, + ) + else: + self.branch_a = lambda x: tf.nn.max_pool3d( + x, + ksize=[1, 3, 3, 3, 1], + strides=[1, 1, 1, 1, 1], + padding="SAME", + name="MaxPool3d_0a_3x3", + ) + + if b_output_channels is not None and b_kernel is not None: + self.branch_b = Unit3D( + output_channels=b_output_channels, + kernel_shape=b_kernel, + is_training=is_training, + name=b_name, + ) + else: + self.branch_b = lambda x: x + + def __call__(self, net): + """ + Connects the module to inputs. + + Args: + net: Inputs to the MixedLayerBranch component. + + Returns: + Outputs from the module. + """ + + return self.branch_b(self.branch_a(net)) + + +class MixedLayer(snt.Module): + """ + Inception layer. + """ + + def __init__( + self, + name, + is_training, + branch_0_a_output_channels, + branch_1_a_output_channels, + branch_1_b_output_channels, + branch_2_a_output_channels, + branch_2_b_output_channels, + branch_3_b_output_channels, + branch_0_a_name="Conv3d_0a_1x1", + branch_0_a_kernel_shape=[1, 1, 1], + branch_1_a_name="Conv3d_0a_1x1", + branch_1_a_kernel=[1, 1, 1], + branch_1_b_name="Conv3d_0b_3x3", + branch_1_b_kernel=[3, 3, 3], + branch_2_a_name="Conv3d_0a_1x1", + branch_2_a_kernel=[1, 1, 1], + branch_2_b_name="Conv3d_0b_3x3", + branch_2_b_kernel=[3, 3, 3], + branch_3_b_name="Conv3d_0b_1x1", + branch_3_b_kernel=[1, 1, 1], + ): + """ + Initializes MixedLayer module. + + Args: + name: name of the module (string). + is_training: whether to use training mode for snt.BatchNorm (boolean). + branch_0_a_output_channels: number of output channels for branch 0A (int). + branch_1_a_output_channels: number of output channels for branch 1A (int). + branch_1_b_output_channels: number of output channels for branch 1B (int). + branch_2_a_output_channels: number of output channels for branch 2A (int). + branch_2_b_output_channels: number of output channels for branch 2B (int). + branch_3_b_output_channels: number of output channels for branch 3B (int). + branch_0_a_name: name of branch 0A (string). + branch_0_a_kernel_shape: shape of the convolutional kernel for branch 0A (iterable of 3 ints). + branch_1_a_name: name of branch 1A (string). + branch_1_a_kernel: shape of the convolutional kernel for branch 1A (iterable of 3 ints). + branch_1_b_name: name of branch 1B (string). + branch_1_b_kernel: shape of the convolutional kernel for branch 1B (iterable of 3 ints). + branch_2_a_name: name of branch 2A (string). + branch_2_a_kernel: shape of the convolutional kernel for branch 2A (iterable of 3 ints). + branch_2_b_name: name of branch 2B (string). + branch_2_b_kernel: shape of the convolutional kernel for branch 2B (iterable of 3 ints). + branch_3_b_name: name of branch 3B (string). + branch_3_b_kernel: shape of the convolutional kernel for branch 3B (iterable of 3 ints). + """ + super(MixedLayer, self).__init__(name=name) + + self.branch_0 = MixedLayerBranch( + name="Branch_0", + is_training=is_training, + a_output_channels=branch_0_a_output_channels, + a_kernel_shape=branch_0_a_kernel_shape, + a_name=branch_0_a_name, + b_output_channels=None, + b_kernel=None, + b_name=None, + ) + + self.branch_1 = MixedLayerBranch( + name="Branch_1", + is_training=is_training, + a_output_channels=branch_1_a_output_channels, + a_kernel_shape=branch_1_a_kernel, + a_name=branch_1_a_name, + b_output_channels=branch_1_b_output_channels, + b_kernel=branch_1_b_kernel, + b_name=branch_1_b_name, + ) + + self.branch_2 = MixedLayerBranch( + name="Branch_2", + is_training=is_training, + a_output_channels=branch_2_a_output_channels, + a_kernel_shape=branch_2_a_kernel, + a_name=branch_2_a_name, + b_output_channels=branch_2_b_output_channels, + b_kernel=branch_2_b_kernel, + b_name=branch_2_b_name, + ) + + self.branch_3 = MixedLayerBranch( + name="Branch_3", + is_training=is_training, + a_output_channels=None, + a_kernel_shape=None, + a_name=None, + b_output_channels=branch_3_b_output_channels, + b_kernel=branch_3_b_kernel, + b_name=branch_3_b_name, + ) + + def __call__(self, net): + """ + Connects the module to inputs. + + Args: + net: Inputs to the MixedLayer component. + + Returns: + Outputs from the module. + """ + + branch_0 = self.branch_0(net) + branch_1 = self.branch_1(net) + branch_2 = self.branch_2(net) + branch_3 = self.branch_3(net) + + return tf.concat([branch_0, branch_1, branch_2, branch_3], 4) + + +class Logits(snt.Module): + """ + Logits layer. + """ + + def __init__( + self, + num_classes, + spatial_squeeze=True, + is_training=False, + dropout_keep_prob=1.0, + name="logits", + ): + super(Logits, self).__init__(name=name) + + self._spatial_squeeze = spatial_squeeze + self._dropout_keep_prob = dropout_keep_prob + + self.logits = Unit3D( + output_channels=num_classes, + kernel_shape=[1, 1, 1], + activation_fn=None, + use_batch_norm=False, + use_bias=True, + is_training=is_training, + name="Conv3d_0c_1x1", + ) + + def __call__(self, net): + net = tf.nn.avg_pool3d( + net, + ksize=[1, 2, 7, 7, 1], + strides=[1, 1, 1, 1, 1], + padding="VALID", + ) + + net = tf.nn.dropout( + net, + 1 - self._dropout_keep_prob, + ) + + net = self.logits(net) + + if self._spatial_squeeze: + net = tf.squeeze(net, [2, 3], name="SpatialSqueeze") + + return tf.reduce_mean(net, axis=1) + + +class InceptionI3d(snt.Module): + """Inception-v1 I3D architecture. + + The model is introduced in: + + Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset + Joao Carreira, Andrew Zisserman + https://arxiv.org/pdf/1705.07750v1.pdf. + + See also the Inception architecture, introduced in: + + Going deeper with convolutions + Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, + Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. + http://arxiv.org/pdf/1409.4842v1.pdf. + """ + + # Endpoints of the model in order. During construction, all the endpoints up + # to a designated `final_endpoint` are returned in a dictionary as the + # second return value. + VALID_ENDPOINTS = ( + "Conv3d_1a_7x7", + "MaxPool3d_2a_3x3", + "Conv3d_2b_1x1", + "Conv3d_2c_3x3", + "MaxPool3d_3a_3x3", + "Mixed_3b", + "Mixed_3c", + "MaxPool3d_4a_3x3", + "Mixed_4b", + "Mixed_4c", + "Mixed_4d", + "Mixed_4e", + "Mixed_4f", + "MaxPool3d_5a_2x2", + "Mixed_5b", + "Mixed_5c", + "Logits", + "Predictions", + ) + + def __init__( + self, + num_classes=400, + spatial_squeeze=True, + is_training=False, + dropout_keep_prob=1.0, + final_endpoint="Logits", + name="inception_i3d", + ): + """Initializes I3D model instance. + + Args: + num_classes: The number of outputs in the logit layer (default 400, which + matches the Kinetics dataset). + spatial_squeeze: Whether to squeeze the spatial dimensions for the logits + before returning (default True). + is_training: whether to use training mode for snt.BatchNorm (boolean). + dropout_keep_prob: Probability for the tf.nn.dropout layer (float in + (0, 1]). + final_endpoint: The model contains many possible endpoints. + `final_endpoint` specifies the last endpoint for the model to be built + up to. In addition to the output at `final_endpoint`, all the outputs + at endpoints up to `final_endpoint` will also be returned, in a + dictionary. `final_endpoint` must be one of + InceptionI3d.VALID_ENDPOINTS (default 'Logits'). + name: A string (optional). The name of this module. + + Raises: + ValueError: + if `final_endpoint` is not recognized. + if `dropout_keep_prob` is not in range (0, 1]. + + """ + + if final_endpoint not in self.VALID_ENDPOINTS: + raise ValueError(f"Unknown final endpoint {final_endpoint}") + + if not 0 < dropout_keep_prob <= 1: + raise ValueError("dropout_keep_prob must be in range (0, 1]") + + super(InceptionI3d, self).__init__(name=name) + + self._final_endpoint = final_endpoint + + # + # layers + # + + # Conv3d_1a_7x7 + + self.Conv3d_1a_7x7 = Unit3D( # pylint: disable=invalid-name + output_channels=64, + kernel_shape=[7, 7, 7], + stride=[2, 2, 2], + is_training=is_training, + name="Conv3d_1a_7x7", + ) + + # MaxPool3d_2a_3x3 + + self.MaxPool3d_2a_3x3 = lambda x: tf.nn.max_pool3d( # pylint: disable=invalid-name + x, + ksize=[1, 1, 3, 3, 1], + strides=[1, 1, 2, 2, 1], + padding="SAME", + name="MaxPool3d_2a_3x3", + ) + + # Conv3d_2b_1x1 + + self.Conv3d_2b_1x1 = Unit3D( # pylint: disable=invalid-name + output_channels=64, + kernel_shape=[1, 1, 1], + is_training=is_training, + name="Conv3d_2b_1x1", + ) + + # Conv3d_2c_3x3 + + self.Conv3d_2c_3x3 = Unit3D( # pylint: disable=invalid-name + output_channels=192, + kernel_shape=[3, 3, 3], + is_training=is_training, + name="Conv3d_2c_3x3", + ) + + # MaxPool3d_3a_3x3 + + self.MaxPool3d_3a_3x3 = lambda x: tf.nn.max_pool3d( # pylint: disable=invalid-name + x, + ksize=[1, 1, 3, 3, 1], + strides=[1, 1, 2, 2, 1], + padding="SAME", + name="MaxPool3d_3a_3x3", + ) + + # Mixed_3b + + self.Mixed_3b = MixedLayer( # pylint: disable=invalid-name + name="Mixed_3b", + is_training=is_training, + branch_0_a_output_channels=64, + branch_1_a_output_channels=96, + branch_1_b_output_channels=128, + branch_2_a_output_channels=16, + branch_2_b_output_channels=32, + branch_3_b_output_channels=32, + ) + + # Mixed_3c + + self.Mixed_3c = MixedLayer( # pylint: disable=invalid-name + name="Mixed_3c", + is_training=is_training, + branch_0_a_output_channels=128, + branch_1_a_output_channels=128, + branch_1_b_output_channels=192, + branch_2_a_output_channels=32, + branch_2_b_output_channels=96, + branch_3_b_output_channels=64, + ) + + # MaxPool3d_4a_3x3 + + self.MaxPool3d_4a_3x3 = lambda x: tf.nn.max_pool3d( # pylint: disable=invalid-name + x, + ksize=[1, 3, 3, 3, 1], + strides=[1, 2, 2, 2, 1], + padding="SAME", + name="MaxPool3d_4a_3x3", + ) + + # Mixed_4b + + self.Mixed_4b = MixedLayer( # pylint: disable=invalid-name + name="Mixed_4b", + is_training=is_training, + branch_0_a_output_channels=192, + branch_1_a_output_channels=96, + branch_1_b_output_channels=208, + branch_2_a_output_channels=16, + branch_2_b_output_channels=48, + branch_3_b_output_channels=64, + ) + + # Mixed_4c + + self.Mixed_4c = MixedLayer( # pylint: disable=invalid-name + name="Mixed_4c", + is_training=is_training, + branch_0_a_output_channels=160, + branch_1_a_output_channels=112, + branch_1_b_output_channels=224, + branch_2_a_output_channels=24, + branch_2_b_output_channels=64, + branch_3_b_output_channels=64, + ) + + # Mixed_4d + + self.Mixed_4d = MixedLayer( # pylint: disable=invalid-name + name="Mixed_4d", + is_training=is_training, + branch_0_a_output_channels=128, + branch_1_a_output_channels=128, + branch_1_b_output_channels=256, + branch_2_a_output_channels=24, + branch_2_b_output_channels=64, + branch_3_b_output_channels=64, + ) + + # Mixed_4e + + self.Mixed_4e = MixedLayer( # pylint: disable=invalid-name + name="Mixed_4e", + is_training=is_training, + branch_0_a_output_channels=112, + branch_1_a_output_channels=144, + branch_1_b_output_channels=288, + branch_2_a_output_channels=32, + branch_2_b_output_channels=64, + branch_3_b_output_channels=64, + ) + + # Mixed_4f + + self.Mixed_4f = MixedLayer( # pylint: disable=invalid-name + name="Mixed_4f", + is_training=is_training, + branch_0_a_output_channels=256, + branch_1_a_output_channels=160, + branch_1_b_output_channels=320, + branch_2_a_output_channels=32, + branch_2_b_output_channels=128, + branch_3_b_output_channels=128, + ) + + # MaxPool3d_5a_2x2 + + self.MaxPool3d_5a_2x2 = lambda x: tf.nn.max_pool3d( # pylint: disable=invalid-name + x, + ksize=[1, 2, 2, 2, 1], + strides=[1, 2, 2, 2, 1], + padding="SAME", + name="MaxPool3d_5a_2x2", + ) + + # Mixed_5b + + self.Mixed_5b = MixedLayer( # pylint: disable=invalid-name + name="Mixed_5b", + is_training=is_training, + branch_0_a_output_channels=256, + branch_1_a_output_channels=160, + branch_1_b_output_channels=320, + branch_2_a_output_channels=32, + branch_2_b_output_channels=128, + branch_2_b_name="Conv3d_0a_3x3", # NOTE: this is different from the original tf v1 implementation + branch_3_b_output_channels=128, + ) + + # Mixed_5c + + self.Mixed_5c = MixedLayer( # pylint: disable=invalid-name + name="Mixed_5c", + is_training=is_training, + branch_0_a_output_channels=384, + branch_1_a_output_channels=192, + branch_1_b_output_channels=384, + branch_2_a_output_channels=48, + branch_2_b_output_channels=128, + branch_3_b_output_channels=128, + ) + + # Logits + + self.Logits = Logits( # pylint: disable=invalid-name + num_classes=num_classes, + spatial_squeeze=spatial_squeeze, + is_training=is_training, + dropout_keep_prob=dropout_keep_prob, + name="Logits", + ) + + # Predictions + + self.Predictions = lambda x: tf.nn.softmax( # pylint: disable=invalid-name + x, + name="Predictions", + ) + + def __call__(self, net): + """Connects the model to inputs. + + Args: + net: Inputs to the model, which should have dimensions + `batch_size` x `num_frames` x 224 x 224 x `num_channels`. + + Returns: + A tuple consisting of: + 1. Network output at location `self._final_endpoint`. + 2. Dictionary containing all endpoints up to `self._final_endpoint`, + indexed by endpoint name. + + Raises: + ValueError: + if net shape is not `batch_size` x `num_frames` x 224 x 224 x `num_channels` + """ + + if len(net.shape) != 5 or net.shape[2] != 224 or net.shape[3] != 224: + raise ValueError( + "Input tensor shape must be [batch_size, num_frames, 224, 224, num_channels]" + ) + + endpoints = {} + + for endpoint in self.VALID_ENDPOINTS: + net = getattr(self, endpoint)(net) + endpoints[endpoint] = net + if endpoint == self._final_endpoint: + break + + return net, endpoints diff --git a/tf_v2/i3d_test.py b/tf_v2/i3d_test.py new file mode 100644 index 0000000..39f6d51 --- /dev/null +++ b/tf_v2/i3d_test.py @@ -0,0 +1,101 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Tests for I3D model code.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +import i3d + +_IMAGE_SIZE = 224 +_NUM_CLASSES = 400 + + +class I3dTest(tf.test.TestCase): + """Test of Inception I3D model, without real data.""" + + def testModelShapesWithSqueeze(self): + """ + Test shapes after running some fake data through the model. + """ + + i3d_model = i3d.InceptionI3d( + num_classes=_NUM_CLASSES, + final_endpoint="Predictions", + is_training=True, + dropout_keep_prob=0.5, + ) + + # Create a dummy input tensor + inp = tf.zeros([5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3], dtype=tf.float32) + + # Forward pass + predictions, end_points = i3d_model(inp) + + # Assert output shapes + self.assertEqual(predictions.shape, (5, _NUM_CLASSES)) + self.assertEqual(end_points["Logits"].shape, (5, _NUM_CLASSES)) + + def testModelShapesWithoutSqueeze(self): + """ + Test that turning off `spatial_squeeze` changes the output shape. + + Also try setting different values for `dropout_keep_prob`. + """ + i3d_model = i3d.InceptionI3d( + num_classes=_NUM_CLASSES, + spatial_squeeze=False, + final_endpoint="Predictions", + is_training=False, + dropout_keep_prob=1.0, + ) + + # Create a dummy input tensor + inp = tf.zeros([5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3], dtype=tf.float32) + + # Forward pass + predictions, end_points = i3d_model(inp) + + # Assert output shapes + self.assertEqual(predictions.shape, (5, 1, 1, _NUM_CLASSES)) + self.assertEqual(end_points["Logits"].shape, (5, 1, 1, _NUM_CLASSES)) + + def testInitErrors(self): + """ + Test that the model raises errors for invalid arguments. + """ + + # Invalid `final_endpoint` string. + with self.assertRaises(ValueError): + _ = i3d.InceptionI3d(num_classes=_NUM_CLASSES, final_endpoint="Conv3d_1a_8x8") + + # Dropout keep probability must be in (0, 1]. + with self.assertRaises(ValueError): + _ = i3d.InceptionI3d(num_classes=_NUM_CLASSES, dropout_keep_prob=0.0) + + # Height and width dimensions of the input should be _IMAGE_SIZE. + i3d_model = i3d.InceptionI3d( + num_classes=_NUM_CLASSES, is_training=False, dropout_keep_prob=0.5 + ) + inp = tf.zeros([5, 64, 10, 10, 3], dtype=tf.float32) + with self.assertRaises(ValueError): + _, _ = i3d_model(inp) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tf_v2/multi_evaluate.sh b/tf_v2/multi_evaluate.sh new file mode 100755 index 0000000..1945699 --- /dev/null +++ b/tf_v2/multi_evaluate.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +python evaluate_sample.py --eval_type rgb > out/no_imagenet_rgb.txt +python evaluate_sample.py --eval_type flow > out/no_imagenet_flow.txt +python evaluate_sample.py --eval_type joint > out/no_imagenet_joint.txt +python evaluate_sample.py --eval_type rgb600 > out/no_imagenet_rgb600.txt +python evaluate_sample.py --imagenet_pretrained --eval_type rgb > out/imagenet_rgb.txt +python evaluate_sample.py --imagenet_pretrained --eval_type flow > out/imagenet_flow.txt +python evaluate_sample.py --imagenet_pretrained --eval_type joint > out/imagenet_joint.txt diff --git a/tf_v2/out/imagenet_flow.txt b/tf_v2/out/imagenet_flow.txt new file mode 100644 index 0000000..662d2da --- /dev/null +++ b/tf_v2/out/imagenet_flow.txt @@ -0,0 +1,23 @@ +Norm of logits: 63.575347900390625 + +Top classes and probabilities +0.9485551 16.029703 playing cricket +0.04000477 12.863762 hurling (sport) +0.0041649365 10.601464 playing tennis +0.002482701 10.08411 playing squash or racquetball +0.0013289828 9.459177 hitting baseball +0.0007768332 8.922234 catching or throwing baseball +0.0007762142 8.921436 sword fighting +0.0007532802 8.891445 catching or throwing softball +0.00023748899 7.737129 hammer throw +0.00019420515 7.5359235 playing badminton +6.2894236e-05 6.4084625 pumping fist +6.216446e-05 6.396792 tai chi +3.7209094e-05 5.883561 training dog +3.3372853e-05 5.7747507 catching or throwing frisbee +2.9358363e-05 5.646586 throwing ball +2.7897815e-05 5.5955563 celebrating +2.7277538e-05 5.5730715 shot put +2.2291364e-05 5.371207 playing ice hockey +2.0230136e-05 5.274182 pole vault +1.9909092e-05 5.2581844 golf putting diff --git a/tf_v2/out/imagenet_joint.txt b/tf_v2/out/imagenet_joint.txt new file mode 100644 index 0000000..eafb56d --- /dev/null +++ b/tf_v2/out/imagenet_joint.txt @@ -0,0 +1,23 @@ +Norm of logits: 139.30224609375 + +Top classes and probabilities +1.0 42.05906 playing cricket +1.2958488e-09 21.59496 hurling (sport) +3.3774655e-10 20.250334 catching or throwing baseball +1.4016698e-10 19.370872 catching or throwing softball +9.8526166e-11 19.01836 hitting baseball +7.8171074e-11 18.786938 playing tennis +2.229529e-11 17.532413 playing kickball +1.0299307e-11 16.760115 playing squash or racquetball +5.1403287e-12 16.065155 shooting goal (soccer) +3.823547e-12 15.769217 hammer throw +1.7345871e-12 14.978807 golf putting +1.3726452e-12 14.744778 throwing discus +1.2943796e-12 14.686069 javelin throw +6.587695e-13 14.010656 pumping fist +4.3580637e-13 13.597482 shot put +3.6077695e-13 13.408543 celebrating +2.2315094e-13 12.928131 applauding +1.5636346e-13 12.572466 throwing ball +1.4015233e-13 12.463012 dodgeball +9.741418e-14 12.099255 tap dancing diff --git a/tf_v2/out/imagenet_rgb.txt b/tf_v2/out/imagenet_rgb.txt new file mode 100644 index 0000000..f9d0bc4 --- /dev/null +++ b/tf_v2/out/imagenet_rgb.txt @@ -0,0 +1,23 @@ +Norm of logits: 87.71087646484375 + +Top classes and probabilities +0.9999969 26.029356 playing cricket +1.2830297e-06 12.463073 playing kickball +4.1240543e-07 11.3281 catching or throwing baseball +2.8364303e-07 10.95381 shooting goal (soccer) +1.7650234e-07 10.479427 catching or throwing softball +1.1857085e-07 10.081604 throwing discus +9.773876e-08 9.8883915 javelin throw +8.264297e-08 9.720623 golf putting +7.1625415e-08 9.577544 jogging +7.032236e-08 9.559182 hitting baseball +5.949871e-08 9.392048 triple jump +3.0725783e-08 8.731196 hurling (sport) +2.697901e-08 8.601151 skateboarding +1.9198977e-08 8.260951 hurdling +1.7803252e-08 8.185474 playing tennis +1.641887e-08 8.104526 breakdancing +1.5271587e-08 8.032087 hammer throw +1.5154795e-08 8.02441 shot put +1.3046246e-08 7.8745933 long jump +1.2785371e-08 7.854394 dodgeball diff --git a/tf_v2/out/no_imagenet_flow.txt b/tf_v2/out/no_imagenet_flow.txt new file mode 100644 index 0000000..7426b22 --- /dev/null +++ b/tf_v2/out/no_imagenet_flow.txt @@ -0,0 +1,23 @@ +Norm of logits: 85.58901977539062 + +Top classes and probabilities +0.69825274 15.443132 playing cricket +0.12184731 13.69732 hurling (sport) +0.05197227 12.845262 hitting baseball +0.01813956 11.792646 catching or throwing softball +0.016728198 11.711647 playing squash or racquetball +0.016611194 11.704628 playing tennis +0.015381196 11.627697 catching or throwing baseball +0.013705299 11.512334 playing badminton +0.010614568 11.256779 sword fighting +0.0080114715 10.975426 shooting goal (soccer) +0.007307311 10.883427 passing American football (in game) +0.0042668637 10.34543 dodgeball +0.004227216 10.336095 playing basketball +0.0016958109 9.422712 playing volleyball +0.001409824 9.238016 kicking field goal +0.001213725 9.088245 high kick +0.00095145876 8.844792 catching or throwing frisbee +0.0009156524 8.806433 javelin throw +0.00089568074 8.78438 dunking basketball +0.00056303403 8.320136 throwing ball diff --git a/tf_v2/out/no_imagenet_joint.txt b/tf_v2/out/no_imagenet_joint.txt new file mode 100644 index 0000000..7b08d42 --- /dev/null +++ b/tf_v2/out/no_imagenet_joint.txt @@ -0,0 +1,23 @@ +Norm of logits: 162.92539978027344 + +Top classes and probabilities +0.9999584 34.958046 playing cricket +3.508524e-05 24.700357 hurling (sport) +3.3313318e-06 22.34595 hitting baseball +2.2695579e-06 21.962162 shooting goal (soccer) +4.910932e-07 20.431456 catching or throwing baseball +1.0910741e-07 18.927155 playing badminton +8.807217e-08 18.712978 catching or throwing softball +8.3003584e-08 18.653706 breakdancing +4.8120775e-08 18.108536 playing tennis +3.7019543e-08 17.846268 playing squash or racquetball +3.666531e-08 17.836653 robot dancing +1.3021506e-08 16.801424 playing basketball +1.1194344e-08 16.65023 playing volleyball +9.855844e-09 16.522886 pumping fist +9.855712e-09 16.522873 tap dancing +7.0978285e-09 16.19461 cheerleading +5.1094395e-09 15.8659115 golf putting +3.9183203e-09 15.600485 playing kickball +3.590486e-09 15.513109 sword fighting +3.0749503e-09 15.35811 dodgeball diff --git a/tf_v2/out/no_imagenet_rgb.txt b/tf_v2/out/no_imagenet_rgb.txt new file mode 100644 index 0000000..3082c98 --- /dev/null +++ b/tf_v2/out/no_imagenet_rgb.txt @@ -0,0 +1,23 @@ +Norm of logits: 87.34051513671875 + +Top classes and probabilities +0.9969248 19.51491 playing cricket +0.001105962 12.71095 robot dancing +0.0007713757 12.350655 pumping fist +0.00039890528 11.691203 breakdancing +0.00020044774 11.003033 hurling (sport) +0.0001972069 10.9867325 shooting goal (soccer) +8.1218466e-05 10.099622 celebrating +4.4620923e-05 9.500682 hitting baseball +2.5156469e-05 8.927594 tap dancing +2.2226246e-05 8.803753 catching or throwing baseball +2.0797093e-05 8.737292 krumping +1.9363993e-05 8.6658945 applauding +1.6439102e-05 8.502142 jumpstyle dancing +1.4521604e-05 8.378117 golf putting +1.4052116e-05 8.345252 dancing charleston +1.3678939e-05 8.3183365 skateboarding +9.371043e-06 7.940104 juggling soccer ball +8.833894e-06 7.881075 cheerleading +8.225115e-06 7.8096714 playing kickball +6.553708e-06 7.5825095 headbutting diff --git a/tf_v2/out/no_imagenet_rgb600.txt b/tf_v2/out/no_imagenet_rgb600.txt new file mode 100644 index 0000000..642f13b --- /dev/null +++ b/tf_v2/out/no_imagenet_rgb600.txt @@ -0,0 +1,23 @@ +Norm of logits: 113.88866424560547 + +Top classes and probabilities +0.9392533 19.437374 playing cricket +0.04382084 16.372398 shooting goal (soccer) +0.009269297 14.818996 hurling (sport) +0.0018941606 13.231065 passing soccer ball +0.0013749333 12.910694 headbutting +0.0012738581 12.834339 tackling +0.00046279083 11.821809 golf putting +0.0003559586 11.559348 hammer throw +0.0003110277 11.424416 kicking field goal +0.00025856317 11.239674 tossing coin +0.0002322037 11.132149 robot dancing +0.00015318775 10.716198 catching or throwing softball +0.00013217264 10.568643 celebrating +0.00012527121 10.515015 playing squash or racquetball +0.00011781188 10.453623 throwing discus +9.195899e-05 10.205876 dodgeball +9.003288e-05 10.184709 golf chipping +7.8850535e-05 10.052088 catching or throwing baseball +6.921439e-05 9.921742 juggling soccer ball +6.5900065e-05 9.872673 golf driving diff --git a/tf_v2/requirements.txt b/tf_v2/requirements.txt new file mode 100644 index 0000000..41903cb --- /dev/null +++ b/tf_v2/requirements.txt @@ -0,0 +1,5 @@ +protobuf==3.19.6 +tensorflow==2.5.0 +dm-sonnet==2.0.2 +tensorflow-probability==0.24.0 +tensorboard==2.5.0 \ No newline at end of file