pytorch · yiwen-song · Nov 27, 2021 · Oct 12, 2021 · Oct 20, 2021 · Oct 22, 2021
diff --git a/test/expect/ModelTester.test_vit_b_16_expect.pkl b/test/expect/ModelTester.test_vit_b_16_expect.pkl
diff --git a/test/expect/ModelTester.test_vit_b_32_expect.pkl b/test/expect/ModelTester.test_vit_b_32_expect.pkl
diff --git a/test/expect/ModelTester.test_vit_l_16_expect.pkl b/test/expect/ModelTester.test_vit_l_16_expect.pkl
diff --git a/test/expect/ModelTester.test_vit_l_32_expect.pkl b/test/expect/ModelTester.test_vit_l_32_expect.pkl
diff --git a/test/test_backbone_utils.py b/test/test_backbone_utils.py
@@ -1,5 +1,6 @@
 import random
 from itertools import chain
+from typing import Mapping, Sequence
 
 import pytest
 import torch
@@ -89,7 +90,16 @@ def _create_feature_extractor(self, *args, **kwargs):
 
     def _get_return_nodes(self, model):
         set_rng_seed(0)
-        exclude_nodes_filter = ["getitem", "floordiv", "size", "chunk"]
+        exclude_nodes_filter = [
+            "getitem",
+            "floordiv",
+            "size",
+            "chunk",
+            "_assert",
+            "eq",
+            "dim",
+            "getattr",
+        ]
         train_nodes, eval_nodes = get_graph_node_names(
             model, tracer_kwargs={"leaf_modules": self.leaf_modules}, suppress_diff_warning=True
         )
@@ -144,7 +154,16 @@ def test_forward_backward(self, model_name):
             model, train_return_nodes=train_return_nodes, eval_return_nodes=eval_return_nodes
         )
         out = model(self.inp)
-        sum(o.mean() for o in out.values()).backward()
+        out_agg = 0
+        for node_out in out.values():
+            if isinstance(node_out, Sequence):
+                out_agg += sum(o.mean() for o in node_out if o is not None)
+            elif isinstance(node_out, Mapping):
+                out_agg += sum(o.mean() for o in node_out.values() if o is not None)
+            else:
+                # Assume that the only other alternative at this point is a Tensor
+                out_agg += node_out.mean()
+        out_agg.backward()
 
     def test_feature_extraction_methods_equivalence(self):
         model = models.resnet18(**self.model_defaults).eval()

diff --git a/test/test_models.py b/test/test_models.py
@@ -507,6 +507,7 @@ def test_classification_model(model_fn, dev):
     }
     model_name = model_fn.__name__
     kwargs = {**defaults, **_model_params.get(model_name, {})}
+    num_classes = kwargs.get("num_classes")
     input_shape = kwargs.pop("input_shape")
 
     model = model_fn(**kwargs)
@@ -515,7 +516,7 @@ def test_classification_model(model_fn, dev):
     x = torch.rand(input_shape).to(device=dev)
     out = model(x)
     _assert_expected(out.cpu(), model_name, prec=0.1)
-    assert out.shape[-1] == 50
+    assert out.shape[-1] == num_classes
     _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None))
     _check_fx_compatible(model, x)
 

diff --git a/torchvision/prototype/models/__init__.py b/torchvision/prototype/models/__init__.py
@@ -10,6 +10,7 @@
 from .shufflenetv2 import *
 from .squeezenet import *
 from .vgg import *
+from .vision_transformer import *
 from . import detection
 from . import quantization
 from . import segmentation