diff --git a/docs/model_zoo/action_recognition_torch.rst b/docs/model_zoo/action_recognition_torch.rst index dfe3de814a..c258d677d6 100644 --- a/docs/model_zoo/action_recognition_torch.rst +++ b/docs/model_zoo/action_recognition_torch.rst @@ -51,4 +51,3 @@ The following table lists our trained models on Something-Something-V2. :header-rows: 1 :class: tight-table :widths: 36 12 10 10 8 12 12 - \ No newline at end of file diff --git a/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics400_torch.csv b/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics400_torch.csv index ec03a95e5b..a4472b873e 100644 --- a/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics400_torch.csv +++ b/docs/model_zoo/csv_tables/Action_Recognitions/Kinetics400_torch.csv @@ -1,31 +1,31 @@ Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config -resnet18_v1b_kinetics400 [3]_,ImageNet,7,1,,854b23e4,`config `_ -resnet34_v1b_kinetics400 [3]_,ImageNet,7,1,,124a2fa4,`config `_ -resnet50_v1b_kinetics400 [3]_,ImageNet,7,1,,9939dbdf,`config `_ -resnet101_v1b_kinetics400 [3]_,ImageNet,7,1,,172afa3b,`config `_ -resnet152_v1b_kinetics400 [3]_,ImageNet,7,1,,3dedb835,`config `_ -r2plus1d_v1_resnet18_kinetics400 [6]_,Scratch,1,16 (32/2),,340a5952,`config `_ -r2plus1d_v1_resnet34_kinetics400 [6]_,Scratch,1,16 (32/2),,5102fd17,`config `_ -r2plus1d_v1_resnet50_kinetics400 [6]_,Scratch,1,16 (32/2),,9a3b665c,`config `_ -r2plus1d_v2_resnet152_kinetics400 [6]_,IG65M,1,16 (32/2),,42707ffc,`config `_ -i3d_resnet50_v1_kinetics400 [4]_,ImageNet,1,32 (64/2),,18545497,`config `_ -i3d_resnet101_v1_kinetics400 [4]_,ImageNet,1,32 (64/2),,a9bb4f89,`config `_ -i3d_nl5_resnet50_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),,9df1e103,`config `_ -i3d_nl10_resnet50_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),,281e1e8a,`config `_ -i3d_nl5_resnet101_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),,2cea8edd,`config `_ -i3d_nl10_resnet101_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),,526a2ed0,`config `_ -slowfast_4x16_resnet50_kinetics400 [8]_,Scratch,1,32 (64/2),,1d1eadb2,`config `_ -slowfast_8x8_resnet50_kinetics400 [8]_,Scratch,1,32 (64/2),,e94e9a57,`config `_ -slowfast_8x8_resnet101_kinetics400 [8]_,Scratch,1,32 (64/2),,db5e9fef,`config `_ -i3d_slow_resnet50_f32s2_kinetics400 [8]_,Scratch,1,32 (64/2),,078c817b,`config `_ -i3d_slow_resnet50_f16s4_kinetics400 [8]_,Scratch,1,16 (64/4),,a3e419f1,`config `_ -i3d_slow_resnet50_f8s8_kinetics400 [8]_,Scratch,1,8 (64/8),,1c3d98a1,`config `_ -i3d_slow_resnet101_f32s2_kinetics400 [8]_,Scratch,1,32 (64/2),,db37cd51,`config `_ -i3d_slow_resnet101_f16s4_kinetics400 [8]_,Scratch,1,16 (64/4),,cb6b78d9,`config `_ -i3d_slow_resnet101_f8s8_kinetics400 [8]_,Scratch,1,8 (64/8),,82e399c1,`config `_ -tpn_resnet50_f8s8_kinetics400 [9]_,Scratch,1,8 (64/8),,368108eb,`config `_ -tpn_resnet50_f16s4_kinetics400 [9]_,Scratch,1,16 (64/4),,6bf899df,`config `_ -tpn_resnet50_f32s2_kinetics400 [9]_,Scratch,1,32 (64/2),,27710ce8,`config `_ -tpn_resnet101_f8s8_kinetics400 [9]_,Scratch,1,8 (64/8),,092c2f7f,`config `_ -tpn_resnet101_f16s4_kinetics400 [9]_,Scratch,1,16 (64/4),,647080df,`config `_ -tpn_resnet101_f32s2_kinetics400 [9]_,Scratch,1,32 (64/2),,a94422a9,`config `_ \ No newline at end of file +resnet18_v1b_kinetics400 [3]_,ImageNet,7,1,66.73,854b23e4,`config `_ +resnet34_v1b_kinetics400 [3]_,ImageNet,7,1,69.85,124a2fa4,`config `_ +resnet50_v1b_kinetics400 [3]_,ImageNet,7,1,70.88,9939dbdf,`config `_ +resnet101_v1b_kinetics400 [3]_,ImageNet,7,1,72.25,172afa3b,`config `_ +resnet152_v1b_kinetics400 [3]_,ImageNet,7,1,72.45,3dedb835,`config `_ +r2plus1d_v1_resnet18_kinetics400 [6]_,Scratch,1,16 (32/2),71.72,340a5952,`config `_ +r2plus1d_v1_resnet34_kinetics400 [6]_,Scratch,1,16 (32/2),72.63,5102fd17,`config `_ +r2plus1d_v1_resnet50_kinetics400 [6]_,Scratch,1,16 (32/2),74.92,9a3b665c,`config `_ +r2plus1d_v2_resnet152_kinetics400 [6]_,IG65M,1,16 (32/2),81.34,42707ffc,`config `_ +i3d_resnet50_v1_kinetics400 [4]_,ImageNet,1,32 (64/2),74.87,18545497,`config `_ +i3d_resnet101_v1_kinetics400 [4]_,ImageNet,1,32 (64/2),75.1,a9bb4f89,`config `_ +i3d_nl5_resnet50_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),75.17,9df1e103,`config `_ +i3d_nl10_resnet50_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),75.93,281e1e8a,`config `_ +i3d_nl5_resnet101_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),75.81,2cea8edd,`config `_ +i3d_nl10_resnet101_v1_kinetics400 [7]_,ImageNet,1,32 (64/2),75.93,526a2ed0,`config `_ +slowfast_4x16_resnet50_kinetics400 [8]_,Scratch,1,32 (64/2),75.25,1d1eadb2,`config `_ +slowfast_8x8_resnet50_kinetics400 [8]_,Scratch,1,32 (64/2),76.66,e94e9a57,`config `_ +slowfast_8x8_resnet101_kinetics400 [8]_,Scratch,1,32 (64/2),76.95,db5e9fef,`config `_ +i3d_slow_resnet50_f32s2_kinetics400 [8]_,Scratch,1,32 (64/2),77.89,078c817b,`config `_ +i3d_slow_resnet50_f16s4_kinetics400 [8]_,Scratch,1,16 (64/4),76.36,a3e419f1,`config `_ +i3d_slow_resnet50_f8s8_kinetics400 [8]_,Scratch,1,8 (64/8),74.41,1c3d98a1,`config `_ +i3d_slow_resnet101_f32s2_kinetics400 [8]_,Scratch,1,32 (64/2),78.57,db37cd51,`config `_ +i3d_slow_resnet101_f16s4_kinetics400 [8]_,Scratch,1,16 (64/4),77.11,cb6b78d9,`config `_ +i3d_slow_resnet101_f8s8_kinetics400 [8]_,Scratch,1,8 (64/8),76.15,82e399c1,`config `_ +tpn_resnet50_f8s8_kinetics400 [9]_,Scratch,1,8 (64/8),77.04,368108eb,`config `_ +tpn_resnet50_f16s4_kinetics400 [9]_,Scratch,1,16 (64/4),77.33,6bf899df,`config `_ +tpn_resnet50_f32s2_kinetics400 [9]_,Scratch,1,32 (64/2),78.9,27710ce8,`config `_ +tpn_resnet101_f8s8_kinetics400 [9]_,Scratch,1,8 (64/8),78.1,092c2f7f,`config `_ +tpn_resnet101_f16s4_kinetics400 [9]_,Scratch,1,16 (64/4),79.39,647080df,`config `_ +tpn_resnet101_f32s2_kinetics400 [9]_,Scratch,1,32 (64/2),79.7,a94422a9,`config `_ \ No newline at end of file diff --git a/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv b/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv index 233af4c31e..3375b2d0d9 100644 --- a/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv +++ b/docs/model_zoo/csv_tables/Action_Recognitions/Something-Something-V2_torch.csv @@ -1,4 +1,3 @@ Name,Pretrained,Segment,Clip Length,Top-1,Hashtag,Config -resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,,cbb9167b,`config `_ -i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),,e975d989,`config `_ -slowfast_16x8_resnet50_sthsthv2 [8]_,Scratch,1,64 (128/2),,05203231,`config `_ \ No newline at end of file +resnet50_v1b_sthsthv2 [3]_,ImageNet,8,1,35.16,cbb9167b,`config `_ +i3d_resnet50_v1_sthsthv2 [4]_,ImageNet,1,16 (32/2),49.61,e975d989,`config `_ \ No newline at end of file diff --git a/scripts/action-recognition/configuration/r2plus1d_v2_resnet152_kinetics400.yaml b/scripts/action-recognition/configuration/r2plus1d_v2_resnet152_kinetics400.yaml index 77327b5431..85fdbc0505 100644 --- a/scripts/action-recognition/configuration/r2plus1d_v2_resnet152_kinetics400.yaml +++ b/scripts/action-recognition/configuration/r2plus1d_v2_resnet152_kinetics400.yaml @@ -42,7 +42,7 @@ CONFIG: TEST_NUM_SEGMENT: 10 TEST_NUM_CROP: 3 MULTIGRID: False - KEEP_ASPECT_RATIO: False + KEEP_ASPECT_RATIO: True CROP_SIZE: 112 SHORT_SIDE_SIZE: 128 NEW_HEIGHT: 128 diff --git a/gluoncv/torch/utils/get_flops.py b/scripts/action-recognition/get_flops.py similarity index 65% rename from gluoncv/torch/utils/get_flops.py rename to scripts/action-recognition/get_flops.py index 0aef206fa2..f32f39ddf1 100644 --- a/gluoncv/torch/utils/get_flops.py +++ b/scripts/action-recognition/get_flops.py @@ -1,31 +1,30 @@ - +""" +Script to compute FLOPs of a model +""" import os import argparse -import numpy as np import torch from gluoncv.torch.model_zoo import get_model -from gluoncv.torch.utils.model_utils import deploy_model from gluoncv.torch.engine.config import get_cfg_defaults - from thop import profile, clever_format if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Test video action recognition models.') + parser = argparse.ArgumentParser(description='Compute FLOPs of a model.') parser.add_argument('--config-file', type=str, help='path to config file.') parser.add_argument('--num-frames', type=int, default=32, help='temporal clip length.') - + parser.add_argument('--input-size', type=int, default=224, + help='size of the input image size. default is 224') args = parser.parse_args() cfg = get_cfg_defaults() cfg.merge_from_file(args.config_file) model = get_model(cfg) - input_tensor = torch.autograd.Variable(torch.rand(1, 3, args.num_frames, 224, 224)) + input_tensor = torch.autograd.Variable(torch.rand(1, 3, args.num_frames, args.input_size, args.input_size)) macs, params = profile(model, inputs=(input_tensor,)) macs, params = clever_format([macs, params], "%.3f") - print("FLOPss: ", macs,"; #params: ", params) - \ No newline at end of file + print("FLOPs: ", macs, "; #params: ", params) diff --git a/scripts/action-recognition/test_ddp_pytorch.py b/scripts/action-recognition/test_ddp_pytorch.py index d7fbfa5315..0e52b8818d 100644 --- a/scripts/action-recognition/test_ddp_pytorch.py +++ b/scripts/action-recognition/test_ddp_pytorch.py @@ -9,7 +9,7 @@ from tensorboardX import SummaryWriter from gluoncv.torch.model_zoo import get_model -from gluoncv.torch.utils.model_utils import deploy_model +from gluoncv.torch.utils.model_utils import deploy_model, load_model from gluoncv.torch.data import build_dataloader_test from gluoncv.torch.utils.task_utils import test_classification from gluoncv.torch.engine.config import get_cfg_defaults @@ -81,6 +81,9 @@ def main_worker(cfg): model = get_model(cfg) model = deploy_model(model, cfg) + if cfg.CONFIG.MODEL.LOAD: + model, _ = load_model(model, cfg) + # create dataset and dataloader test_loader = build_dataloader_test(cfg) diff --git a/scripts/action-recognition/train_ddp_pytorch.py b/scripts/action-recognition/train_ddp_pytorch.py index 4e2ea8eeb8..44ea499fe1 100644 --- a/scripts/action-recognition/train_ddp_pytorch.py +++ b/scripts/action-recognition/train_ddp_pytorch.py @@ -52,6 +52,7 @@ def main_worker(cfg): last_epoch=cfg.CONFIG.TRAIN.RESUME_EPOCH) else: print('Learning rate schedule %s is not supported yet. Please use Step or Cosine.') + if cfg.CONFIG.TRAIN.USE_WARMUP: scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=(cfg.CONFIG.TRAIN.WARMUP_END_LR / cfg.CONFIG.TRAIN.LR), diff --git a/scripts/action-recognition/train_ddp_shortonly_pytorch.py b/scripts/action-recognition/train_ddp_shortonly_pytorch.py index 4a7c524e30..c6774a280a 100644 --- a/scripts/action-recognition/train_ddp_shortonly_pytorch.py +++ b/scripts/action-recognition/train_ddp_shortonly_pytorch.py @@ -47,6 +47,7 @@ def main_worker(cfg): last_epoch=cfg.CONFIG.TRAIN.RESUME_EPOCH) else: print('Learning rate schedule %s is not supported yet. Please use Step or Cosine.') + if cfg.CONFIG.TRAIN.USE_WARMUP: scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=(cfg.CONFIG.TRAIN.WARMUP_END_LR / cfg.CONFIG.TRAIN.LR),