From 036a3dae369e07fde895ef22e11d30f7726b1cf7 Mon Sep 17 00:00:00 2001
From: cyy <cyyever@outlook.com>
Date: Sun, 15 Dec 2024 09:45:34 +0800
Subject: [PATCH] Upgrade to Python 3.9

---
 torchvision/datasets/_optical_flow.py         |  14 +-
 torchvision/datasets/_stereo_matching.py      |  36 +-
 torchvision/datasets/caltech.py               |  10 +-
 torchvision/datasets/celeba.py                |   4 +-
 torchvision/datasets/cifar.py                 |   2 +-
 torchvision/datasets/cityscapes.py            |   6 +-
 torchvision/datasets/clevr.py                 |   4 +-
 torchvision/datasets/coco.py                  |   6 +-
 torchvision/datasets/dtd.py                   |   2 +-
 torchvision/datasets/fakedata.py              |   4 +-
 torchvision/datasets/fer2013.py               |   4 +-
 torchvision/datasets/fgvc_aircraft.py         |  12 +-
 torchvision/datasets/flickr.py                |   8 +-
 torchvision/datasets/flowers102.py            |   2 +-
 torchvision/datasets/folder.py                |  22 +-
 torchvision/datasets/food101.py               |   2 +-
 torchvision/datasets/gtsrb.py                 |   2 +-
 torchvision/datasets/hmdb51.py                |   8 +-
 torchvision/datasets/imagenet.py              |  11 +-
 torchvision/datasets/imagenette.py            |   2 +-
 torchvision/datasets/inaturalist.py           |  12 +-
 torchvision/datasets/kinetics.py              |   8 +-
 torchvision/datasets/kitti.py                 |   4 +-
 torchvision/datasets/lfw.py                   |  12 +-
 torchvision/datasets/lsun.py                  |   8 +-
 torchvision/datasets/mnist.py                 |   8 +-
 torchvision/datasets/omniglot.py              |   6 +-
 torchvision/datasets/oxford_iiit_pet.py       |   5 +-
 torchvision/datasets/pcam.py                  |   2 +-
 torchvision/datasets/phototour.py             |   4 +-
 torchvision/datasets/places365.py             |  10 +-
 torchvision/datasets/rendered_sst2.py         |   2 +-
 torchvision/datasets/samplers/clip_sampler.py |   7 +-
 torchvision/datasets/sbd.py                   |   2 +-
 torchvision/datasets/sbu.py                   |   2 +-
 torchvision/datasets/semeion.py               |   2 +-
 torchvision/datasets/stanford_cars.py         |   2 +-
 torchvision/datasets/stl10.py                 |   4 +-
 torchvision/datasets/sun397.py                |   2 +-
 torchvision/datasets/svhn.py                  |   2 +-
 torchvision/datasets/ucf101.py                |   8 +-
 torchvision/datasets/usps.py                  |   2 +-
 torchvision/datasets/utils.py                 |  22 +-
 torchvision/datasets/video_utils.py           |  28 +-
 torchvision/datasets/vision.py                |   6 +-
 torchvision/datasets/voc.py                   |  14 +-
 torchvision/datasets/widerface.py             |   4 +-
 torchvision/ops/_utils.py                     |  12 +-
 torchvision/ops/boxes.py                      |  10 +-
 torchvision/ops/deform_conv.py                |   6 +-
 torchvision/ops/diou_loss.py                  |   2 +-
 torchvision/ops/feature_pyramid_network.py    |  28 +-
 torchvision/ops/misc.py                       |  37 ++-
 torchvision/ops/poolers.py                    |  36 +-
 torchvision/ops/roi_align.py                  |   4 +-
 torchvision/ops/roi_pool.py                   |   4 +-
 torchvision/transforms/_functional_pil.py     |  23 +-
 torchvision/transforms/_functional_tensor.py  |  52 +--
 torchvision/transforms/_presets.py            |  18 +-
 torchvision/transforms/autoaugment.py         |  22 +-
 torchvision/transforms/functional.py          |  56 ++--
 torchvision/transforms/transforms.py          |  46 +--
 torchvision/transforms/v2/_augment.py         |  23 +-
 torchvision/transforms/v2/_auto_augment.py    |  26 +-
 torchvision/transforms/v2/_color.py           |  53 +--
 torchvision/transforms/v2/_container.py       |   7 +-
 torchvision/transforms/v2/_deprecated.py      |   2 +-
 torchvision/transforms/v2/_geometry.py        | 101 +++---
 torchvision/transforms/v2/_meta.py            |   4 +-
 torchvision/transforms/v2/_misc.py            |  27 +-
 torchvision/transforms/v2/_temporal.py        |   2 +-
 torchvision/transforms/v2/_transform.py       |  14 +-
 torchvision/transforms/v2/_type_conversion.py |   8 +-
 torchvision/transforms/v2/_utils.py           |  25 +-
 .../transforms/v2/functional/_color.py        |   8 +-
 .../transforms/v2/functional/_deprecated.py   |   2 +-
 .../transforms/v2/functional/_geometry.py     | 309 +++++++++---------
 torchvision/transforms/v2/functional/_meta.py |  22 +-
 torchvision/transforms/v2/functional/_misc.py |  24 +-
 .../transforms/v2/functional/_utils.py        |   7 +-
 80 files changed, 690 insertions(+), 679 deletions(-)

diff --git a/torchvision/datasets/_optical_flow.py b/torchvision/datasets/_optical_flow.py
index e8d6247f03f..089c26ccf87 100644
--- a/torchvision/datasets/_optical_flow.py
+++ b/torchvision/datasets/_optical_flow.py
@@ -13,8 +13,8 @@
 from .utils import _read_pfm, verify_str_arg
 from .vision import VisionDataset
 
-T1 = Tuple[Image.Image, Image.Image, Optional[np.ndarray], Optional[np.ndarray]]
-T2 = Tuple[Image.Image, Image.Image, Optional[np.ndarray]]
+T1 = tuple[Image.Image, Image.Image, Optional[np.ndarray], Optional[np.ndarray]]
+T2 = tuple[Image.Image, Image.Image, Optional[np.ndarray]]
 
 
 __all__ = (
@@ -37,8 +37,8 @@ def __init__(self, root: Union[str, Path], transforms: Optional[Callable] = None
         super().__init__(root=root)
         self.transforms = transforms
 
-        self._flow_list: List[str] = []
-        self._image_list: List[List[str]] = []
+        self._flow_list: list[str] = []
+        self._image_list: list[list[str]] = []
 
     def _read_img(self, file_name: str) -> Image.Image:
         img = Image.open(file_name)
@@ -225,7 +225,7 @@ def __getitem__(self, index: int) -> Union[T1, T2]:
         """
         return super().__getitem__(index)
 
-    def _read_flow(self, file_name: str) -> Tuple[np.ndarray, np.ndarray]:
+    def _read_flow(self, file_name: str) -> tuple[np.ndarray, np.ndarray]:
         return _read_16bits_png_with_flow_and_valid_mask(file_name)
 
 
@@ -443,7 +443,7 @@ def __init__(self, root: Union[str, Path], split: str = "train", transforms: Opt
                 "Could not find the HD1K images. Please make sure the directory structure is correct."
             )
 
-    def _read_flow(self, file_name: str) -> Tuple[np.ndarray, np.ndarray]:
+    def _read_flow(self, file_name: str) -> tuple[np.ndarray, np.ndarray]:
         return _read_16bits_png_with_flow_and_valid_mask(file_name)
 
     def __getitem__(self, index: int) -> Union[T1, T2]:
@@ -479,7 +479,7 @@ def _read_flo(file_name: str) -> np.ndarray:
         return data.reshape(h, w, 2).transpose(2, 0, 1)
 
 
-def _read_16bits_png_with_flow_and_valid_mask(file_name: str) -> Tuple[np.ndarray, np.ndarray]:
+def _read_16bits_png_with_flow_and_valid_mask(file_name: str) -> tuple[np.ndarray, np.ndarray]:
 
     flow_and_valid = decode_png(read_file(file_name)).to(torch.float32)
     flow, valid_flow_mask = flow_and_valid[:2, :, :], flow_and_valid[2, :, :]
diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py
index 09961211cc2..2edffb14d0c 100644
--- a/torchvision/datasets/_stereo_matching.py
+++ b/torchvision/datasets/_stereo_matching.py
@@ -14,8 +14,8 @@
 from .utils import _read_pfm, download_and_extract_archive, verify_str_arg
 from .vision import VisionDataset
 
-T1 = Tuple[Image.Image, Image.Image, Optional[np.ndarray], np.ndarray]
-T2 = Tuple[Image.Image, Image.Image, Optional[np.ndarray]]
+T1 = tuple[Image.Image, Image.Image, Optional[np.ndarray], np.ndarray]
+T2 = tuple[Image.Image, Image.Image, Optional[np.ndarray]]
 
 __all__ = ()
 
@@ -65,11 +65,11 @@ def _scan_pairs(
         self,
         paths_left_pattern: str,
         paths_right_pattern: Optional[str] = None,
-    ) -> List[Tuple[str, Optional[str]]]:
+    ) -> list[tuple[str, Optional[str]]]:
 
         left_paths = list(sorted(glob(paths_left_pattern)))
 
-        right_paths: List[Union[None, str]]
+        right_paths: list[Union[None, str]]
         if paths_right_pattern:
             right_paths = list(sorted(glob(paths_right_pattern)))
         else:
@@ -92,7 +92,7 @@ def _scan_pairs(
         return paths
 
     @abstractmethod
-    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+    def _read_disparity(self, file_path: str) -> tuple[Optional[np.ndarray], Optional[np.ndarray]]:
         # function that returns a disparity map and an occlusion map
         pass
 
@@ -178,7 +178,7 @@ def __init__(self, root: Union[str, Path], transforms: Optional[Callable] = None
         disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
         self._disparities = disparities
 
-    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+    def _read_disparity(self, file_path: str) -> tuple[np.ndarray, None]:
         disparity_map = _read_pfm_file(file_path)
         disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
         valid_mask = None
@@ -257,7 +257,7 @@ def __init__(self, root: Union[str, Path], split: str = "train", transforms: Opt
         else:
             self._disparities = list((None, None) for _ in self._images)
 
-    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], None]:
+    def _read_disparity(self, file_path: str) -> tuple[Optional[np.ndarray], None]:
         # test split has no disparity maps
         if file_path is None:
             return None, None
@@ -345,7 +345,7 @@ def __init__(self, root: Union[str, Path], split: str = "train", transforms: Opt
         else:
             self._disparities = list((None, None) for _ in self._images)
 
-    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], None]:
+    def _read_disparity(self, file_path: str) -> tuple[Optional[np.ndarray], None]:
         # test split has no disparity maps
         if file_path is None:
             return None, None
@@ -549,7 +549,7 @@ def _read_img(self, file_path: Union[str, Path]) -> Image.Image:
         When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]``
         as the right image.
         """
-        ambient_file_paths: List[Union[str, Path]]  # make mypy happy
+        ambient_file_paths: list[Union[str, Path]]  # make mypy happy
 
         if not isinstance(file_path, Path):
             file_path = Path(file_path)
@@ -565,7 +565,7 @@ def _read_img(self, file_path: Union[str, Path]) -> Image.Image:
             file_path = random.choice(ambient_file_paths)  # type: ignore
         return super()._read_img(file_path)
 
-    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+    def _read_disparity(self, file_path: str) -> Union[tuple[None, None], tuple[np.ndarray, np.ndarray]]:
         # test split has not disparity maps
         if file_path is None:
             return None, None
@@ -694,7 +694,7 @@ def __init__(
             disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
             self._disparities += disparities
 
-    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+    def _read_disparity(self, file_path: str) -> tuple[np.ndarray, None]:
         disparity_map = np.asarray(Image.open(file_path), dtype=np.float32)
         # unsqueeze the disparity map into (C, H, W) format
         disparity_map = disparity_map[None, :, :] / 32.0
@@ -788,13 +788,13 @@ def __init__(self, root: Union[str, Path], variant: str = "single", transforms:
             right_disparity_pattern = str(root / s / split_prefix[s] / "*.right.depth.png")
             self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
 
-    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+    def _read_disparity(self, file_path: str) -> tuple[np.ndarray, None]:
         # (H, W) image
         depth = np.asarray(Image.open(file_path))
         # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt
         # in order to extract disparity from depth maps
         camera_settings_path = Path(file_path).parent / "_camera_settings.json"
-        with open(camera_settings_path, "r") as f:
+        with open(camera_settings_path) as f:
             # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant)
             intrinsics = json.load(f)
             focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
@@ -911,7 +911,7 @@ def __init__(
             right_disparity_pattern = str(root / "disparity" / prefix_directories[variant] / "right" / "*.pfm")
             self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
 
-    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+    def _read_disparity(self, file_path: str) -> tuple[np.ndarray, None]:
         disparity_map = _read_pfm_file(file_path)
         disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
         valid_mask = None
@@ -999,7 +999,7 @@ def __init__(self, root: Union[str, Path], pass_name: str = "final", transforms:
             disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png")
             self._disparities += self._scan_pairs(disparity_pattern, None)
 
-    def _get_occlussion_mask_paths(self, file_path: str) -> Tuple[str, str]:
+    def _get_occlussion_mask_paths(self, file_path: str) -> tuple[str, str]:
         # helper function to get the occlusion mask paths
         # a path will look like  .../.../.../training/disparities/scene1/img1.png
         # we want to get something like .../.../.../training/occlusions/scene1/img1.png
@@ -1020,7 +1020,7 @@ def _get_occlussion_mask_paths(self, file_path: str) -> Tuple[str, str]:
 
         return occlusion_path, outofframe_path
 
-    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+    def _read_disparity(self, file_path: str) -> Union[tuple[None, None], tuple[np.ndarray, np.ndarray]]:
         if file_path is None:
             return None, None
 
@@ -1101,7 +1101,7 @@ def __init__(self, root: Union[str, Path], split: str = "train", transforms: Opt
         right_disparity_pattern = str(root / "*" / "right_disp.png")
         self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
 
-    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+    def _read_disparity(self, file_path: str) -> tuple[np.ndarray, None]:
         disparity_map = np.asarray(Image.open(file_path), dtype=np.float32)
         # unsqueeze disparity to (C, H, W)
         disparity_map = disparity_map[None, :, :] / 1024.0
@@ -1195,7 +1195,7 @@ def __init__(self, root: Union[str, Path], split: str = "train", transforms: Opt
             disparity_pattern = str(root / anot_dir / "*" / "disp0GT.pfm")
             self._disparities = self._scan_pairs(disparity_pattern, None)
 
-    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+    def _read_disparity(self, file_path: str) -> Union[tuple[None, None], tuple[np.ndarray, np.ndarray]]:
         # test split has no disparity maps
         if file_path is None:
             return None, None
diff --git a/torchvision/datasets/caltech.py b/torchvision/datasets/caltech.py
index b152c425c77..484cd7fd131 100644
--- a/torchvision/datasets/caltech.py
+++ b/torchvision/datasets/caltech.py
@@ -40,7 +40,7 @@ class Caltech101(VisionDataset):
     def __init__(
         self,
         root: Union[str, Path],
-        target_type: Union[List[str], str] = "category",
+        target_type: Union[list[str], str] = "category",
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         download: bool = False,
@@ -71,14 +71,14 @@ def __init__(
         }
         self.annotation_categories = list(map(lambda x: name_map[x] if x in name_map else x, self.categories))
 
-        self.index: List[int] = []
+        self.index: list[int] = []
         self.y = []
         for (i, c) in enumerate(self.categories):
             n = len(os.listdir(os.path.join(self.root, "101_ObjectCategories", c)))
             self.index.extend(range(1, n + 1))
             self.y.extend(n * [i])
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -181,7 +181,7 @@ def __init__(
             raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
 
         self.categories = sorted(os.listdir(os.path.join(self.root, "256_ObjectCategories")))
-        self.index: List[int] = []
+        self.index: list[int] = []
         self.y = []
         for (i, c) in enumerate(self.categories):
             n = len(
@@ -194,7 +194,7 @@ def __init__(
             self.index.extend(range(1, n + 1))
             self.y.extend(n * [i])
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/celeba.py b/torchvision/datasets/celeba.py
index c15120af5a5..446872b7155 100644
--- a/torchvision/datasets/celeba.py
+++ b/torchvision/datasets/celeba.py
@@ -66,7 +66,7 @@ def __init__(
         self,
         root: Union[str, Path],
         split: str = "train",
-        target_type: Union[List[str], str] = "attr",
+        target_type: Union[list[str], str] = "attr",
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         download: bool = False,
@@ -155,7 +155,7 @@ def download(self) -> None:
 
         extract_archive(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index]))
 
         target: Any = []
diff --git a/torchvision/datasets/cifar.py b/torchvision/datasets/cifar.py
index 0f425d76c57..9478f45151c 100644
--- a/torchvision/datasets/cifar.py
+++ b/torchvision/datasets/cifar.py
@@ -101,7 +101,7 @@ def _load_meta(self) -> None:
             self.classes = data[self.meta["key"]]
         self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)}
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/cityscapes.py b/torchvision/datasets/cityscapes.py
index 97a47c07beb..6e60fe99684 100644
--- a/torchvision/datasets/cityscapes.py
+++ b/torchvision/datasets/cityscapes.py
@@ -107,7 +107,7 @@ def __init__(
         root: Union[str, Path],
         split: str = "train",
         mode: str = "fine",
-        target_type: Union[List[str], str] = "instance",
+        target_type: Union[list[str], str] = "instance",
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         transforms: Optional[Callable] = None,
@@ -172,7 +172,7 @@ def __init__(
                 self.images.append(os.path.join(img_dir, file_name))
                 self.targets.append(target_types)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -206,7 +206,7 @@ def extra_repr(self) -> str:
         lines = ["Split: {split}", "Mode: {mode}", "Type: {target_type}"]
         return "\n".join(lines).format(**self.__dict__)
 
-    def _load_json(self, path: str) -> Dict[str, Any]:
+    def _load_json(self, path: str) -> dict[str, Any]:
         with open(path) as file:
             data = json.load(file)
         return data
diff --git a/torchvision/datasets/clevr.py b/torchvision/datasets/clevr.py
index 328eb7d79da..49293fb24d9 100644
--- a/torchvision/datasets/clevr.py
+++ b/torchvision/datasets/clevr.py
@@ -49,7 +49,7 @@ def __init__(
 
         self._image_files = sorted(self._data_folder.joinpath("images", self._split).glob("*"))
 
-        self._labels: List[Optional[int]]
+        self._labels: list[Optional[int]]
         if self._split != "test":
             with open(self._data_folder / "scenes" / f"CLEVR_{self._split}_scenes.json") as file:
                 content = json.load(file)
@@ -61,7 +61,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file = self._image_files[idx]
         label = self._labels[idx]
 
diff --git a/torchvision/datasets/coco.py b/torchvision/datasets/coco.py
index f3b7be798b2..1654f7578bf 100644
--- a/torchvision/datasets/coco.py
+++ b/torchvision/datasets/coco.py
@@ -41,10 +41,10 @@ def _load_image(self, id: int) -> Image.Image:
         path = self.coco.loadImgs(id)[0]["file_name"]
         return Image.open(os.path.join(self.root, path)).convert("RGB")
 
-    def _load_target(self, id: int) -> List[Any]:
+    def _load_target(self, id: int) -> list[Any]:
         return self.coco.loadAnns(self.coco.getAnnIds(id))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
 
         if not isinstance(index, int):
             raise ValueError(f"Index must be of type integer, got {type(index)} instead.")
@@ -105,5 +105,5 @@ class CocoCaptions(CocoDetection):
 
     """
 
-    def _load_target(self, id: int) -> List[str]:
+    def _load_target(self, id: int) -> list[str]:
         return [ann["caption"] for ann in super()._load_target(id)]
diff --git a/torchvision/datasets/dtd.py b/torchvision/datasets/dtd.py
index 71c556bd201..dc5f6a6bae9 100644
--- a/torchvision/datasets/dtd.py
+++ b/torchvision/datasets/dtd.py
@@ -76,7 +76,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._image_files[idx], self._labels[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/fakedata.py b/torchvision/datasets/fakedata.py
index af26a8579e5..7223c508aca 100644
--- a/torchvision/datasets/fakedata.py
+++ b/torchvision/datasets/fakedata.py
@@ -25,7 +25,7 @@ class FakeData(VisionDataset):
     def __init__(
         self,
         size: int = 1000,
-        image_size: Tuple[int, int, int] = (3, 224, 224),
+        image_size: tuple[int, int, int] = (3, 224, 224),
         num_classes: int = 10,
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
@@ -37,7 +37,7 @@ def __init__(
         self.image_size = image_size
         self.random_offset = random_offset
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/fer2013.py b/torchvision/datasets/fer2013.py
index 3afda07846b..d6513a63e86 100644
--- a/torchvision/datasets/fer2013.py
+++ b/torchvision/datasets/fer2013.py
@@ -92,7 +92,7 @@ def get_label(row):
             else:
                 return None
 
-        with open(data_file, "r", newline="") as file:
+        with open(data_file, newline="") as file:
             rows = (row for row in csv.DictReader(file))
 
             if use_fer_file or use_icml_file:
@@ -104,7 +104,7 @@ def get_label(row):
     def __len__(self) -> int:
         return len(self._samples)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_tensor, target = self._samples[idx]
         image = Image.fromarray(image_tensor.numpy())
 
diff --git a/torchvision/datasets/fgvc_aircraft.py b/torchvision/datasets/fgvc_aircraft.py
index bbf4e970a78..d8dde6f3fe9 100644
--- a/torchvision/datasets/fgvc_aircraft.py
+++ b/torchvision/datasets/fgvc_aircraft.py
@@ -42,11 +42,11 @@ class FGVCAircraft(VisionDataset):
 
     def __init__(
         self,
-        root: Union[str, Path],
+        root: str | Path,
         split: str = "trainval",
         annotation_level: str = "variant",
-        transform: Optional[Callable] = None,
-        target_transform: Optional[Callable] = None,
+        transform: Callable | None = None,
+        target_transform: Callable | None = None,
         download: bool = False,
     ) -> None:
         super().__init__(root, transform=transform, target_transform=target_transform)
@@ -71,7 +71,7 @@ def __init__(
                 "manufacturer": "manufacturers.txt",
             }[self._annotation_level],
         )
-        with open(annotation_file, "r") as f:
+        with open(annotation_file) as f:
             self.classes = [line.strip() for line in f]
 
         self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
@@ -82,7 +82,7 @@ def __init__(
         self._image_files = []
         self._labels = []
 
-        with open(labels_file, "r") as f:
+        with open(labels_file) as f:
             for line in f:
                 image_name, label_name = line.strip().split(" ", 1)
                 self._image_files.append(os.path.join(image_data_folder, f"{image_name}.jpg"))
@@ -91,7 +91,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._image_files[idx], self._labels[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/flickr.py b/torchvision/datasets/flickr.py
index 1021309db05..33a85aa4ccb 100644
--- a/torchvision/datasets/flickr.py
+++ b/torchvision/datasets/flickr.py
@@ -19,14 +19,14 @@ def __init__(self, root: Union[str, Path]) -> None:
         self.root = root
 
         # Data structure to store captions
-        self.annotations: Dict[str, List[str]] = {}
+        self.annotations: dict[str, list[str]] = {}
 
         # State variables
         self.in_table = False
         self.current_tag: Optional[str] = None
         self.current_img: Optional[str] = None
 
-    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None:
         self.current_tag = tag
 
         if tag == "table":
@@ -83,7 +83,7 @@ def __init__(
 
         self.ids = list(sorted(self.annotations.keys()))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -140,7 +140,7 @@ def __init__(
 
         self.ids = list(sorted(self.annotations.keys()))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/flowers102.py b/torchvision/datasets/flowers102.py
index 07f403702f5..332cab64d07 100644
--- a/torchvision/datasets/flowers102.py
+++ b/torchvision/datasets/flowers102.py
@@ -76,7 +76,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._image_files[idx], self._labels[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/folder.py b/torchvision/datasets/folder.py
index 8f2f65c7b61..2ba8227db5a 100644
--- a/torchvision/datasets/folder.py
+++ b/torchvision/datasets/folder.py
@@ -8,7 +8,7 @@
 from .vision import VisionDataset
 
 
-def has_file_allowed_extension(filename: str, extensions: Union[str, Tuple[str, ...]]) -> bool:
+def has_file_allowed_extension(filename: str, extensions: Union[str, tuple[str, ...]]) -> bool:
     """Checks if a file is an allowed extension.
 
     Args:
@@ -33,7 +33,7 @@ def is_image_file(filename: str) -> bool:
     return has_file_allowed_extension(filename, IMG_EXTENSIONS)
 
 
-def find_classes(directory: Union[str, Path]) -> Tuple[List[str], Dict[str, int]]:
+def find_classes(directory: Union[str, Path]) -> tuple[list[str], dict[str, int]]:
     """Finds the class folders in a dataset.
 
     See :class:`DatasetFolder` for details.
@@ -48,11 +48,11 @@ def find_classes(directory: Union[str, Path]) -> Tuple[List[str], Dict[str, int]
 
 def make_dataset(
     directory: Union[str, Path],
-    class_to_idx: Optional[Dict[str, int]] = None,
-    extensions: Optional[Union[str, Tuple[str, ...]]] = None,
+    class_to_idx: Optional[dict[str, int]] = None,
+    extensions: Optional[Union[str, tuple[str, ...]]] = None,
     is_valid_file: Optional[Callable[[str], bool]] = None,
     allow_empty: bool = False,
-) -> List[Tuple[str, int]]:
+) -> list[tuple[str, int]]:
     """Generates a list of samples of a form (path_to_sample, class).
 
     See :class:`DatasetFolder` for details.
@@ -139,7 +139,7 @@ def __init__(
         self,
         root: Union[str, Path],
         loader: Callable[[str], Any],
-        extensions: Optional[Tuple[str, ...]] = None,
+        extensions: Optional[tuple[str, ...]] = None,
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         is_valid_file: Optional[Callable[[str], bool]] = None,
@@ -166,11 +166,11 @@ def __init__(
     @staticmethod
     def make_dataset(
         directory: Union[str, Path],
-        class_to_idx: Dict[str, int],
-        extensions: Optional[Tuple[str, ...]] = None,
+        class_to_idx: dict[str, int],
+        extensions: Optional[tuple[str, ...]] = None,
         is_valid_file: Optional[Callable[[str], bool]] = None,
         allow_empty: bool = False,
-    ) -> List[Tuple[str, int]]:
+    ) -> list[tuple[str, int]]:
         """Generates a list of samples of a form (path_to_sample, class).
 
         This can be overridden to e.g. read files from a compressed zip file instead of from the disk.
@@ -204,7 +204,7 @@ def make_dataset(
             directory, class_to_idx, extensions=extensions, is_valid_file=is_valid_file, allow_empty=allow_empty
         )
 
-    def find_classes(self, directory: Union[str, Path]) -> Tuple[List[str], Dict[str, int]]:
+    def find_classes(self, directory: Union[str, Path]) -> tuple[list[str], dict[str, int]]:
         """Find the class folders in a dataset structured as follows::
 
             directory/
@@ -233,7 +233,7 @@ def find_classes(self, directory: Union[str, Path]) -> Tuple[List[str], Dict[str
         """
         return find_classes(directory)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/food101.py b/torchvision/datasets/food101.py
index f734787c1bf..9b92a3c5c47 100644
--- a/torchvision/datasets/food101.py
+++ b/torchvision/datasets/food101.py
@@ -69,7 +69,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._image_files[idx], self._labels[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/gtsrb.py b/torchvision/datasets/gtsrb.py
index a3d012c70b2..b96b2b61f88 100644
--- a/torchvision/datasets/gtsrb.py
+++ b/torchvision/datasets/gtsrb.py
@@ -62,7 +62,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._samples)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
 
         path, target = self._samples[index]
         sample = PIL.Image.open(path).convert("RGB")
diff --git a/torchvision/datasets/hmdb51.py b/torchvision/datasets/hmdb51.py
index 8377e40d57c..6094d00ed46 100644
--- a/torchvision/datasets/hmdb51.py
+++ b/torchvision/datasets/hmdb51.py
@@ -68,7 +68,7 @@ def __init__(
         fold: int = 1,
         train: bool = True,
         transform: Optional[Callable] = None,
-        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        _precomputed_metadata: Optional[dict[str, Any]] = None,
         num_workers: int = 1,
         _video_width: int = 0,
         _video_height: int = 0,
@@ -113,10 +113,10 @@ def __init__(
         self.transform = transform
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         return self.full_video_clips.metadata
 
-    def _select_fold(self, video_list: List[str], annotations_dir: str, fold: int, train: bool) -> List[int]:
+    def _select_fold(self, video_list: list[str], annotations_dir: str, fold: int, train: bool) -> list[int]:
         target_tag = self.TRAIN_TAG if train else self.TEST_TAG
         split_pattern_name = f"*test_split{fold}.txt"
         split_pattern_path = os.path.join(annotations_dir, split_pattern_name)
@@ -141,7 +141,7 @@ def _select_fold(self, video_list: List[str], annotations_dir: str, fold: int, t
     def __len__(self) -> int:
         return self.video_clips.num_clips()
 
-    def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
+    def __getitem__(self, idx: int) -> tuple[Tensor, Tensor, int]:
         video, audio, _, video_idx = self.video_clips.get_clip(idx)
         sample_index = self.indices[video_idx]
         _, class_index = self.samples[sample_index]
diff --git a/torchvision/datasets/imagenet.py b/torchvision/datasets/imagenet.py
index d7caf328d2b..ae6d833dc9d 100644
--- a/torchvision/datasets/imagenet.py
+++ b/torchvision/datasets/imagenet.py
@@ -3,7 +3,8 @@
 import tempfile
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
+from collections.abc import Iterator
 
 import torch
 
@@ -79,7 +80,7 @@ def extra_repr(self) -> str:
         return "Split: {split}".format(**self.__dict__)
 
 
-def load_meta_file(root: Union[str, Path], file: Optional[str] = None) -> Tuple[Dict[str, str], List[str]]:
+def load_meta_file(root: Union[str, Path], file: Optional[str] = None) -> tuple[dict[str, str], list[str]]:
     if file is None:
         file = META_FILE
     file = os.path.join(root, file)
@@ -114,7 +115,7 @@ def parse_devkit_archive(root: Union[str, Path], file: Optional[str] = None) ->
     """
     import scipy.io as sio
 
-    def parse_meta_mat(devkit_root: str) -> Tuple[Dict[int, str], Dict[str, Tuple[str, ...]]]:
+    def parse_meta_mat(devkit_root: str) -> tuple[dict[int, str], dict[str, tuple[str, ...]]]:
         metafile = os.path.join(devkit_root, "data", "meta.mat")
         meta = sio.loadmat(metafile, squeeze_me=True)["synsets"]
         nums_children = list(zip(*meta))[4]
@@ -125,7 +126,7 @@ def parse_meta_mat(devkit_root: str) -> Tuple[Dict[int, str], Dict[str, Tuple[st
         wnid_to_classes = {wnid: clss for wnid, clss in zip(wnids, classes)}
         return idx_to_wnid, wnid_to_classes
 
-    def parse_val_groundtruth_txt(devkit_root: str) -> List[int]:
+    def parse_val_groundtruth_txt(devkit_root: str) -> list[int]:
         file = os.path.join(devkit_root, "data", "ILSVRC2012_validation_ground_truth.txt")
         with open(file) as txtfh:
             val_idcs = txtfh.readlines()
@@ -184,7 +185,7 @@ def parse_train_archive(root: Union[str, Path], file: Optional[str] = None, fold
 
 
 def parse_val_archive(
-    root: Union[str, Path], file: Optional[str] = None, wnids: Optional[List[str]] = None, folder: str = "val"
+    root: Union[str, Path], file: Optional[str] = None, wnids: Optional[list[str]] = None, folder: str = "val"
 ) -> None:
     """Parse the validation images archive of the ImageNet2012 classification dataset
     and prepare it for usage with the ImageNet dataset.
diff --git a/torchvision/datasets/imagenette.py b/torchvision/datasets/imagenette.py
index 0b27f3b25e5..8547672f3ad 100644
--- a/torchvision/datasets/imagenette.py
+++ b/torchvision/datasets/imagenette.py
@@ -85,7 +85,7 @@ def _download(self):
 
         download_and_extract_archive(self._url, self.root, md5=self._md5)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         path, label = self._samples[idx]
         image = Image.open(path).convert("RGB")
 
diff --git a/torchvision/datasets/inaturalist.py b/torchvision/datasets/inaturalist.py
index e041d41f4a2..f4cf724374f 100644
--- a/torchvision/datasets/inaturalist.py
+++ b/torchvision/datasets/inaturalist.py
@@ -68,7 +68,7 @@ def __init__(
         self,
         root: Union[str, Path],
         version: str = "2021_train",
-        target_type: Union[List[str], str] = "full",
+        target_type: Union[list[str], str] = "full",
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
         download: bool = False,
@@ -84,13 +84,13 @@ def __init__(
         if not self._check_exists():
             raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
 
-        self.all_categories: List[str] = []
+        self.all_categories: list[str] = []
 
         # map: category type -> name of category -> index
-        self.categories_index: Dict[str, Dict[str, int]] = {}
+        self.categories_index: dict[str, dict[str, int]] = {}
 
         # list indexed by category id, containing mapping from category type -> index
-        self.categories_map: List[Dict[str, int]] = []
+        self.categories_map: list[dict[str, int]] = []
 
         if not isinstance(target_type, list):
             target_type = [target_type]
@@ -102,7 +102,7 @@ def __init__(
             self._init_pre2021()
 
         # index of all files: (full category id, filename)
-        self.index: List[Tuple[int, str]] = []
+        self.index: list[tuple[int, str]] = []
 
         for dir_index, dir_name in enumerate(self.all_categories):
             files = os.listdir(os.path.join(self.root, dir_name))
@@ -168,7 +168,7 @@ def _init_pre2021(self) -> None:
             if not c:
                 raise RuntimeError(f"Missing category {cindex}")
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/kinetics.py b/torchvision/datasets/kinetics.py
index 773d9f68ca9..c786ef71a8b 100644
--- a/torchvision/datasets/kinetics.py
+++ b/torchvision/datasets/kinetics.py
@@ -97,11 +97,11 @@ def __init__(
         frame_rate: Optional[int] = None,
         step_between_clips: int = 1,
         transform: Optional[Callable] = None,
-        extensions: Tuple[str, ...] = ("avi", "mp4"),
+        extensions: tuple[str, ...] = ("avi", "mp4"),
         download: bool = False,
         num_download_workers: int = 1,
         num_workers: int = 1,
-        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        _precomputed_metadata: Optional[dict[str, Any]] = None,
         _video_width: int = 0,
         _video_height: int = 0,
         _video_min_dimension: int = 0,
@@ -221,13 +221,13 @@ def _make_ds_structure(self) -> None:
                     )
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         return self.video_clips.metadata
 
     def __len__(self) -> int:
         return self.video_clips.num_clips()
 
-    def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
+    def __getitem__(self, idx: int) -> tuple[Tensor, Tensor, int]:
         video, audio, info, video_idx = self.video_clips.get_clip(idx)
         label = self.samples[video_idx][1]
 
diff --git a/torchvision/datasets/kitti.py b/torchvision/datasets/kitti.py
index 69e603c76f2..9d136c9ef83 100644
--- a/torchvision/datasets/kitti.py
+++ b/torchvision/datasets/kitti.py
@@ -83,7 +83,7 @@ def __init__(
             if self.train:
                 self.targets.append(os.path.join(labels_dir, f"{img_file.split('.')[0]}.txt"))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """Get item at a given index.
 
         Args:
@@ -108,7 +108,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
             image, target = self.transforms(image, target)
         return image, target
 
-    def _parse_target(self, index: int) -> List:
+    def _parse_target(self, index: int) -> list:
         target = []
         with open(self.targets[index]) as inp:
             content = csv.reader(inp, delimiter=" ")
diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py
index 18374fc3c9b..f7e1f06bb74 100644
--- a/torchvision/datasets/lfw.py
+++ b/torchvision/datasets/lfw.py
@@ -48,7 +48,7 @@ def __init__(
         self.view = verify_str_arg(view.lower(), "view", ["people", "pairs"])
         self.split = verify_str_arg(split.lower(), "split", ["10fold", "train", "test"])
         self.labels_file = f"{self.view}{self.annot_file[self.split]}.txt"
-        self.data: List[Any] = []
+        self.data: list[Any] = []
 
         if download:
             self.download()
@@ -125,7 +125,7 @@ def __init__(
         self.class_to_idx = self._get_classes()
         self.data, self.targets = self._get_people()
 
-    def _get_people(self) -> Tuple[List[str], List[int]]:
+    def _get_people(self) -> tuple[list[str], list[int]]:
         data, targets = [], []
         with open(os.path.join(self.root, self.labels_file)) as f:
             lines = f.readlines()
@@ -143,14 +143,14 @@ def _get_people(self) -> Tuple[List[str], List[int]]:
 
         return data, targets
 
-    def _get_classes(self) -> Dict[str, int]:
+    def _get_classes(self) -> dict[str, int]:
         with open(os.path.join(self.root, self.names)) as f:
             lines = f.readlines()
             names = [line.strip().split()[0] for line in lines]
         class_to_idx = {name: i for i, name in enumerate(names)}
         return class_to_idx
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -206,7 +206,7 @@ def __init__(
 
         self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir)
 
-    def _get_pairs(self, images_dir: str) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]], List[int]]:
+    def _get_pairs(self, images_dir: str) -> tuple[list[tuple[str, str]], list[tuple[str, str]], list[int]]:
         pair_names, data, targets = [], [], []
         with open(os.path.join(self.root, self.labels_file)) as f:
             lines = f.readlines()
@@ -234,7 +234,7 @@ def _get_pairs(self, images_dir: str) -> Tuple[List[Tuple[str, str]], List[Tuple
 
         return pair_names, data, targets
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any, int]:
+    def __getitem__(self, index: int) -> tuple[Any, Any, int]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/lsun.py b/torchvision/datasets/lsun.py
index 61d40eee221..3179de5ac6d 100644
--- a/torchvision/datasets/lsun.py
+++ b/torchvision/datasets/lsun.py
@@ -31,7 +31,7 @@ def __init__(
                 self.keys = [key for key in txn.cursor().iternext(keys=True, values=False)]
             pickle.dump(self.keys, open(cache_file, "wb"))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         img, target = None, None
         env = self.env
         with env.begin(write=False) as txn:
@@ -73,7 +73,7 @@ class LSUN(VisionDataset):
     def __init__(
         self,
         root: Union[str, Path],
-        classes: Union[str, List[str]] = "train",
+        classes: Union[str, list[str]] = "train",
         transform: Optional[Callable] = None,
         target_transform: Optional[Callable] = None,
     ) -> None:
@@ -93,7 +93,7 @@ def __init__(
 
         self.length = count
 
-    def _verify_classes(self, classes: Union[str, List[str]]) -> List[str]:
+    def _verify_classes(self, classes: Union[str, list[str]]) -> list[str]:
         categories = [
             "bedroom",
             "bridge",
@@ -136,7 +136,7 @@ def _verify_classes(self, classes: Union[str, List[str]]) -> List[str]:
 
         return classes
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py
index fd145553529..4dea883cbdc 100644
--- a/torchvision/datasets/mnist.py
+++ b/torchvision/datasets/mnist.py
@@ -128,7 +128,7 @@ def _load_data(self):
 
         return data, targets
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -162,7 +162,7 @@ def processed_folder(self) -> str:
         return os.path.join(self.root, self.__class__.__name__, "processed")
 
     @property
-    def class_to_idx(self) -> Dict[str, int]:
+    def class_to_idx(self) -> dict[str, int]:
         return {_class: i for i, _class in enumerate(self.classes)}
 
     def _check_exists(self) -> bool:
@@ -372,7 +372,7 @@ class QMNIST(MNIST):
     """
 
     subsets = {"train": "train", "test": "test", "test10k": "test", "test50k": "test", "nist": "nist"}
-    resources: Dict[str, List[Tuple[str, str]]] = {  # type: ignore[assignment]
+    resources: dict[str, list[tuple[str, str]]] = {  # type: ignore[assignment]
         "train": [
             (
                 "https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-images-idx3-ubyte.gz",
@@ -475,7 +475,7 @@ def download(self) -> None:
         for url, md5 in split:
             download_and_extract_archive(url, self.raw_folder, md5=md5)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         # redefined to handle the compat flag
         img, target = self.data[index], self.targets[index]
         img = Image.fromarray(img.numpy(), mode="L")
diff --git a/torchvision/datasets/omniglot.py b/torchvision/datasets/omniglot.py
index c3434a72456..8fc405dca97 100644
--- a/torchvision/datasets/omniglot.py
+++ b/torchvision/datasets/omniglot.py
@@ -51,19 +51,19 @@ def __init__(
 
         self.target_folder = join(self.root, self._get_target_folder())
         self._alphabets = list_dir(self.target_folder)
-        self._characters: List[str] = sum(
+        self._characters: list[str] = sum(
             ([join(a, c) for c in list_dir(join(self.target_folder, a))] for a in self._alphabets), []
         )
         self._character_images = [
             [(image, idx) for image in list_files(join(self.target_folder, character), ".png")]
             for idx, character in enumerate(self._characters)
         ]
-        self._flat_character_images: List[Tuple[str, int]] = sum(self._character_images, [])
+        self._flat_character_images: list[tuple[str, int]] = sum(self._character_images, [])
 
     def __len__(self) -> int:
         return len(self._flat_character_images)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/oxford_iiit_pet.py b/torchvision/datasets/oxford_iiit_pet.py
index 1d6d990fdf9..4dc0b7b90b2 100644
--- a/torchvision/datasets/oxford_iiit_pet.py
+++ b/torchvision/datasets/oxford_iiit_pet.py
@@ -1,7 +1,8 @@
 import os
 import os.path
 import pathlib
-from typing import Any, Callable, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Optional, Tuple, Union
+from collections.abc import Sequence
 
 from PIL import Image
 
@@ -93,7 +94,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._images)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image = Image.open(self._images[idx]).convert("RGB")
 
         target: Any = []
diff --git a/torchvision/datasets/pcam.py b/torchvision/datasets/pcam.py
index 8849e0ea39d..0f38c3335f5 100644
--- a/torchvision/datasets/pcam.py
+++ b/torchvision/datasets/pcam.py
@@ -103,7 +103,7 @@ def __len__(self) -> int:
         with self.h5py.File(self._base_folder / images_file) as images_data:
             return images_data["x"].shape[0]
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         images_file = self._FILES[self._split]["images"][0]
         with self.h5py.File(self._base_folder / images_file) as images_data:
             image = Image.fromarray(images_data["x"][idx]).convert("RGB")
diff --git a/torchvision/datasets/phototour.py b/torchvision/datasets/phototour.py
index 9511f0626b4..a44e49fb53d 100644
--- a/torchvision/datasets/phototour.py
+++ b/torchvision/datasets/phototour.py
@@ -114,7 +114,7 @@ def __init__(
         # load the serialized data
         self.data, self.labels, self.matches = torch.load(self.data_file, weights_only=True)
 
-    def __getitem__(self, index: int) -> Union[torch.Tensor, Tuple[Any, Any, torch.Tensor]]:
+    def __getitem__(self, index: int) -> Union[torch.Tensor, tuple[Any, Any, torch.Tensor]]:
         """
         Args:
             index (int): Index
@@ -187,7 +187,7 @@ def PIL2array(_img: Image.Image) -> np.ndarray:
         """Convert PIL image type to numpy 2D array"""
         return np.array(_img.getdata(), dtype=np.uint8).reshape(64, 64)
 
-    def find_files(_data_dir: str, _image_ext: str) -> List[str]:
+    def find_files(_data_dir: str, _image_ext: str) -> list[str]:
         """Return a list with the file names of the images containing the patches"""
         files = []
         # find those files with the specified extension
diff --git a/torchvision/datasets/places365.py b/torchvision/datasets/places365.py
index a120e0e217a..8444ec78e00 100644
--- a/torchvision/datasets/places365.py
+++ b/torchvision/datasets/places365.py
@@ -83,7 +83,7 @@ def __init__(
         if download:
             self.download_images()
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         file, target = self.imgs[index]
         image = self.loader(file)
 
@@ -108,8 +108,8 @@ def images_dir(self) -> str:
             dir = f"{self.split}_{size}"
         return path.join(self.root, dir)
 
-    def load_categories(self, download: bool = True) -> Tuple[List[str], Dict[str, int]]:
-        def process(line: str) -> Tuple[str, int]:
+    def load_categories(self, download: bool = True) -> tuple[list[str], dict[str, int]]:
+        def process(line: str) -> tuple[str, int]:
             cls, idx = line.split()
             return cls, int(idx)
 
@@ -123,8 +123,8 @@ def process(line: str) -> Tuple[str, int]:
 
         return sorted(class_to_idx.keys()), class_to_idx
 
-    def load_file_list(self, download: bool = True) -> Tuple[List[Tuple[str, int]], List[int]]:
-        def process(line: str, sep="/") -> Tuple[str, int]:
+    def load_file_list(self, download: bool = True) -> tuple[list[tuple[str, int]], list[int]]:
+        def process(line: str, sep="/") -> tuple[str, int]:
             image, idx = line.split()
             return path.join(self.images_dir, image.lstrip(sep).replace(sep, os.sep)), int(idx)
 
diff --git a/torchvision/datasets/rendered_sst2.py b/torchvision/datasets/rendered_sst2.py
index 48b0ddfc4fb..7821b2423b6 100644
--- a/torchvision/datasets/rendered_sst2.py
+++ b/torchvision/datasets/rendered_sst2.py
@@ -59,7 +59,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._samples)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._samples[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/samplers/clip_sampler.py b/torchvision/datasets/samplers/clip_sampler.py
index 026c3d75d3b..eb07f0ce829 100644
--- a/torchvision/datasets/samplers/clip_sampler.py
+++ b/torchvision/datasets/samplers/clip_sampler.py
@@ -1,5 +1,6 @@
 import math
-from typing import cast, Iterator, List, Optional, Sized, Union
+from typing import cast, List, Optional, Union
+from collections.abc import Iterator, Sized
 
 import torch
 import torch.distributed as dist
@@ -71,7 +72,7 @@ def __iter__(self) -> Iterator[int]:
         # deterministically shuffle based on epoch
         g = torch.Generator()
         g.manual_seed(self.epoch)
-        indices: Union[torch.Tensor, List[int]]
+        indices: Union[torch.Tensor, list[int]]
         if self.shuffle:
             indices = torch.randperm(len(self.dataset), generator=g).tolist()
         else:
@@ -132,7 +133,7 @@ def __iter__(self) -> Iterator[int]:
             sampled = torch.linspace(s, s + length - 1, steps=self.num_clips_per_video).floor().to(torch.int64)
             s += length
             idxs.append(sampled)
-        return iter(cast(List[int], torch.cat(idxs).tolist()))
+        return iter(cast(list[int], torch.cat(idxs).tolist()))
 
     def __len__(self) -> int:
         return sum(self.num_clips_per_video for c in self.video_clips.clips if len(c) > 0)
diff --git a/torchvision/datasets/sbd.py b/torchvision/datasets/sbd.py
index 4b9ccb75eb9..7b32c648030 100644
--- a/torchvision/datasets/sbd.py
+++ b/torchvision/datasets/sbd.py
@@ -109,7 +109,7 @@ def _get_boundaries_target(self, filepath: str) -> np.ndarray:
             axis=0,
         )
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         img = Image.open(self.images[index]).convert("RGB")
         target = self._get_target(self.masks[index])
 
diff --git a/torchvision/datasets/sbu.py b/torchvision/datasets/sbu.py
index b5f46101e07..b536a0bbf25 100644
--- a/torchvision/datasets/sbu.py
+++ b/torchvision/datasets/sbu.py
@@ -58,7 +58,7 @@ def __init__(
                 self.photos.append(photo)
                 self.captions.append(caption)
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/semeion.py b/torchvision/datasets/semeion.py
index 71485b14dcf..738be7328d8 100644
--- a/torchvision/datasets/semeion.py
+++ b/torchvision/datasets/semeion.py
@@ -51,7 +51,7 @@ def __init__(
         self.data = np.reshape(self.data, (-1, 16, 16))
         self.labels = np.nonzero(data[:, 256:])[1]
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py
index 6264de82eb7..491d0ba9e9e 100644
--- a/torchvision/datasets/stanford_cars.py
+++ b/torchvision/datasets/stanford_cars.py
@@ -88,7 +88,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._samples)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         """Returns pil_image and class_id for given index"""
         image_path, target = self._samples[idx]
         pil_image = Image.open(image_path).convert("RGB")
diff --git a/torchvision/datasets/stl10.py b/torchvision/datasets/stl10.py
index 39c058e497d..b143ff2dd42 100644
--- a/torchvision/datasets/stl10.py
+++ b/torchvision/datasets/stl10.py
@@ -100,7 +100,7 @@ def _verify_folds(self, folds: Optional[int]) -> Optional[int]:
             msg = "Expected type None or int for argument folds, but got type {}."
             raise ValueError(msg.format(type(folds)))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -129,7 +129,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
     def __len__(self) -> int:
         return self.data.shape[0]
 
-    def __loadfile(self, data_file: str, labels_file: Optional[str] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+    def __loadfile(self, data_file: str, labels_file: Optional[str] = None) -> tuple[np.ndarray, Optional[np.ndarray]]:
         labels = None
         if labels_file:
             path_to_labels = os.path.join(self.root, self.base_folder, labels_file)
diff --git a/torchvision/datasets/sun397.py b/torchvision/datasets/sun397.py
index 4db0a3cf237..052018568f4 100644
--- a/torchvision/datasets/sun397.py
+++ b/torchvision/datasets/sun397.py
@@ -55,7 +55,7 @@ def __init__(
     def __len__(self) -> int:
         return len(self._image_files)
 
-    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+    def __getitem__(self, idx: int) -> tuple[Any, Any]:
         image_file, label = self._image_files[idx], self._labels[idx]
         image = PIL.Image.open(image_file).convert("RGB")
 
diff --git a/torchvision/datasets/svhn.py b/torchvision/datasets/svhn.py
index 5d20d7db7e3..62916e4b6bf 100644
--- a/torchvision/datasets/svhn.py
+++ b/torchvision/datasets/svhn.py
@@ -91,7 +91,7 @@ def __init__(
         np.place(self.labels, self.labels == 10, 0)
         self.data = np.transpose(self.data, (3, 2, 0, 1))
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/ucf101.py b/torchvision/datasets/ucf101.py
index 935f8ad41c7..a56a90bdd9f 100644
--- a/torchvision/datasets/ucf101.py
+++ b/torchvision/datasets/ucf101.py
@@ -61,7 +61,7 @@ def __init__(
         fold: int = 1,
         train: bool = True,
         transform: Optional[Callable] = None,
-        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        _precomputed_metadata: Optional[dict[str, Any]] = None,
         num_workers: int = 1,
         _video_width: int = 0,
         _video_height: int = 0,
@@ -102,10 +102,10 @@ def __init__(
         self.transform = transform
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         return self.full_video_clips.metadata
 
-    def _select_fold(self, video_list: List[str], annotation_path: str, fold: int, train: bool) -> List[int]:
+    def _select_fold(self, video_list: list[str], annotation_path: str, fold: int, train: bool) -> list[int]:
         name = "train" if train else "test"
         name = f"{name}list{fold:02d}.txt"
         f = os.path.join(annotation_path, name)
@@ -121,7 +121,7 @@ def _select_fold(self, video_list: List[str], annotation_path: str, fold: int, t
     def __len__(self) -> int:
         return self.video_clips.num_clips()
 
-    def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
+    def __getitem__(self, idx: int) -> tuple[Tensor, Tensor, int]:
         video, audio, info, video_idx = self.video_clips.get_clip(idx)
         label = self.samples[self.indices[video_idx]][1]
 
diff --git a/torchvision/datasets/usps.py b/torchvision/datasets/usps.py
index 9c681e79f6c..f1a9304a7a0 100644
--- a/torchvision/datasets/usps.py
+++ b/torchvision/datasets/usps.py
@@ -70,7 +70,7 @@ def __init__(
         self.data = imgs
         self.targets = targets
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 8bf310896c7..63072b4b6e1 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -12,7 +12,8 @@
 import urllib.error
 import urllib.request
 import zipfile
-from typing import Any, Callable, Dict, IO, Iterable, List, Optional, Tuple, TypeVar, Union
+from typing import Any, Callable, Dict, IO, List, Optional, Tuple, TypeVar, Union
+from collections.abc import Iterable
 from urllib.parse import urlparse
 
 import numpy as np
@@ -36,10 +37,7 @@ def calculate_md5(fpath: Union[str, pathlib.Path], chunk_size: int = 1024 * 1024
     # Setting the `usedforsecurity` flag does not change anything about the functionality, but indicates that we are
     # not using the MD5 checksum for cryptography. This enables its usage in restricted environments like FIPS. Without
     # it torchvision.datasets is unusable in these environments since we perform a MD5 check everywhere.
-    if sys.version_info >= (3, 9):
-        md5 = hashlib.md5(usedforsecurity=False)
-    else:
-        md5 = hashlib.md5()
+    md5 = hashlib.md5(usedforsecurity=False)
     with open(fpath, "rb") as f:
         while chunk := f.read(chunk_size):
             md5.update(chunk)
@@ -140,7 +138,7 @@ def download_url(
         raise RuntimeError("File not found or corrupted.")
 
 
-def list_dir(root: Union[str, pathlib.Path], prefix: bool = False) -> List[str]:
+def list_dir(root: Union[str, pathlib.Path], prefix: bool = False) -> list[str]:
     """List all directories at a given root
 
     Args:
@@ -155,7 +153,7 @@ def list_dir(root: Union[str, pathlib.Path], prefix: bool = False) -> List[str]:
     return directories
 
 
-def list_files(root: Union[str, pathlib.Path], suffix: str, prefix: bool = False) -> List[str]:
+def list_files(root: Union[str, pathlib.Path], suffix: str, prefix: bool = False) -> list[str]:
     """List all files ending with a suffix at a given root
 
     Args:
@@ -216,7 +214,7 @@ def _extract_tar(
         tar.extractall(to_path)
 
 
-_ZIP_COMPRESSION_MAP: Dict[str, int] = {
+_ZIP_COMPRESSION_MAP: dict[str, int] = {
     ".bz2": zipfile.ZIP_BZIP2,
     ".xz": zipfile.ZIP_LZMA,
 }
@@ -231,23 +229,23 @@ def _extract_zip(
         zip.extractall(to_path)
 
 
-_ARCHIVE_EXTRACTORS: Dict[str, Callable[[Union[str, pathlib.Path], Union[str, pathlib.Path], Optional[str]], None]] = {
+_ARCHIVE_EXTRACTORS: dict[str, Callable[[Union[str, pathlib.Path], Union[str, pathlib.Path], Optional[str]], None]] = {
     ".tar": _extract_tar,
     ".zip": _extract_zip,
 }
-_COMPRESSED_FILE_OPENERS: Dict[str, Callable[..., IO]] = {
+_COMPRESSED_FILE_OPENERS: dict[str, Callable[..., IO]] = {
     ".bz2": bz2.open,
     ".gz": gzip.open,
     ".xz": lzma.open,
 }
-_FILE_TYPE_ALIASES: Dict[str, Tuple[Optional[str], Optional[str]]] = {
+_FILE_TYPE_ALIASES: dict[str, tuple[Optional[str], Optional[str]]] = {
     ".tbz": (".tar", ".bz2"),
     ".tbz2": (".tar", ".bz2"),
     ".tgz": (".tar", ".gz"),
 }
 
 
-def _detect_file_type(file: Union[str, pathlib.Path]) -> Tuple[str, Optional[str], Optional[str]]:
+def _detect_file_type(file: Union[str, pathlib.Path]) -> tuple[str, Optional[str], Optional[str]]:
     """Detect the archive type and/or compression of a file.
 
     Args:
diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index a412bc5841c..7623b755a3f 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -53,13 +53,13 @@ class _VideoTimestampsDataset:
     pickled when forking.
     """
 
-    def __init__(self, video_paths: List[str]) -> None:
+    def __init__(self, video_paths: list[str]) -> None:
         self.video_paths = video_paths
 
     def __len__(self) -> int:
         return len(self.video_paths)
 
-    def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]:
+    def __getitem__(self, idx: int) -> tuple[list[int], Optional[float]]:
         return read_video_timestamps(self.video_paths[idx])
 
 
@@ -99,11 +99,11 @@ class VideoClips:
 
     def __init__(
         self,
-        video_paths: List[str],
+        video_paths: list[str],
         clip_length_in_frames: int = 16,
         frames_between_clips: int = 1,
         frame_rate: Optional[float] = None,
-        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        _precomputed_metadata: Optional[dict[str, Any]] = None,
         num_workers: int = 0,
         _video_width: int = 0,
         _video_height: int = 0,
@@ -136,7 +136,7 @@ def __init__(
 
     def _compute_frame_pts(self) -> None:
         self.video_pts = []  # len = num_videos. Each entry is a tensor of shape (num_frames_in_video,)
-        self.video_fps: List[float] = []  # len = num_videos
+        self.video_fps: list[float] = []  # len = num_videos
 
         # strategy: use a DataLoader to parallelize read_video_timestamps
         # so need to create a dummy dataset first
@@ -160,7 +160,7 @@ def _compute_frame_pts(self) -> None:
                 self.video_pts.extend(batch_pts)
                 self.video_fps.extend(batch_fps)
 
-    def _init_from_metadata(self, metadata: Dict[str, Any]) -> None:
+    def _init_from_metadata(self, metadata: dict[str, Any]) -> None:
         self.video_paths = metadata["video_paths"]
         assert len(self.video_paths) == len(metadata["video_pts"])
         self.video_pts = metadata["video_pts"]
@@ -168,7 +168,7 @@ def _init_from_metadata(self, metadata: Dict[str, Any]) -> None:
         self.video_fps = metadata["video_fps"]
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         _metadata = {
             "video_paths": self.video_paths,
             "video_pts": self.video_pts,
@@ -176,7 +176,7 @@ def metadata(self) -> Dict[str, Any]:
         }
         return _metadata
 
-    def subset(self, indices: List[int]) -> "VideoClips":
+    def subset(self, indices: list[int]) -> "VideoClips":
         video_paths = [self.video_paths[i] for i in indices]
         video_pts = [self.video_pts[i] for i in indices]
         video_fps = [self.video_fps[i] for i in indices]
@@ -204,7 +204,7 @@ def subset(self, indices: List[int]) -> "VideoClips":
     @staticmethod
     def compute_clips_for_video(
         video_pts: torch.Tensor, num_frames: int, step: int, fps: Optional[float], frame_rate: Optional[float] = None
-    ) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]:
+    ) -> tuple[torch.Tensor, Union[list[slice], torch.Tensor]]:
         if fps is None:
             # if for some reason the video doesn't have fps (because doesn't have a video stream)
             # set the fps to 1. The value doesn't matter, because video_pts is empty anyway
@@ -220,7 +220,7 @@ def compute_clips_for_video(
                 "There aren't enough frames in the current video to get a clip for the given clip length and "
                 "frames between clips. The video (and potentially others) will be skipped."
             )
-        idxs: Union[List[slice], torch.Tensor]
+        idxs: Union[list[slice], torch.Tensor]
         if isinstance(_idxs, slice):
             idxs = [_idxs] * len(clips)
         else:
@@ -262,7 +262,7 @@ def num_clips(self) -> int:
         """
         return self.cumulative_sizes[-1]
 
-    def get_clip_location(self, idx: int) -> Tuple[int, int]:
+    def get_clip_location(self, idx: int) -> tuple[int, int]:
         """
         Converts a flattened representation of the indices into a video_idx, clip_idx
         representation.
@@ -286,7 +286,7 @@ def _resample_video_idx(num_frames: int, original_fps: float, new_fps: float) ->
         idxs = idxs.floor().to(torch.int64)
         return idxs
 
-    def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any], int]:
+    def get_clip(self, idx: int) -> tuple[torch.Tensor, torch.Tensor, dict[str, Any], int]:
         """
         Gets a subclip from a list of videos.
 
@@ -374,7 +374,7 @@ def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]
 
         return video, audio, info, video_idx
 
-    def __getstate__(self) -> Dict[str, Any]:
+    def __getstate__(self) -> dict[str, Any]:
         video_pts_sizes = [len(v) for v in self.video_pts]
         # To be back-compatible, we convert data to dtype torch.long as needed
         # because for empty list, in legacy implementation, torch.as_tensor will
@@ -402,7 +402,7 @@ def __getstate__(self) -> Dict[str, Any]:
         d["_version"] = 2
         return d
 
-    def __setstate__(self, d: Dict[str, Any]) -> None:
+    def __setstate__(self, d: dict[str, Any]) -> None:
         # for backwards-compatibility
         if "_version" not in d:
             self.__dict__ = d
diff --git a/torchvision/datasets/vision.py b/torchvision/datasets/vision.py
index e524c67e263..be4bf644b0e 100644
--- a/torchvision/datasets/vision.py
+++ b/torchvision/datasets/vision.py
@@ -77,7 +77,7 @@ def __repr__(self) -> str:
         lines = [head] + [" " * self._repr_indent + line for line in body]
         return "\n".join(lines)
 
-    def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
+    def _format_transform_repr(self, transform: Callable, head: str) -> list[str]:
         lines = transform.__repr__().splitlines()
         return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
 
@@ -90,14 +90,14 @@ def __init__(self, transform: Optional[Callable] = None, target_transform: Optio
         self.transform = transform
         self.target_transform = target_transform
 
-    def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
+    def __call__(self, input: Any, target: Any) -> tuple[Any, Any]:
         if self.transform is not None:
             input = self.transform(input)
         if self.target_transform is not None:
             target = self.target_transform(target)
         return input, target
 
-    def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
+    def _format_transform_repr(self, transform: Callable, head: str) -> list[str]:
         lines = transform.__repr__().splitlines()
         return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
 
diff --git a/torchvision/datasets/voc.py b/torchvision/datasets/voc.py
index 0f0e84c84fa..c3ec935d341 100644
--- a/torchvision/datasets/voc.py
+++ b/torchvision/datasets/voc.py
@@ -141,10 +141,10 @@ class VOCSegmentation(_VOCBase):
     _TARGET_FILE_EXT = ".png"
 
     @property
-    def masks(self) -> List[str]:
+    def masks(self) -> list[str]:
         return self.targets
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -186,10 +186,10 @@ class VOCDetection(_VOCBase):
     _TARGET_FILE_EXT = ".xml"
 
     @property
-    def annotations(self) -> List[str]:
+    def annotations(self) -> list[str]:
         return self.targets
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
@@ -206,11 +206,11 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         return img, target
 
     @staticmethod
-    def parse_voc_xml(node: ET_Element) -> Dict[str, Any]:
-        voc_dict: Dict[str, Any] = {}
+    def parse_voc_xml(node: ET_Element) -> dict[str, Any]:
+        voc_dict: dict[str, Any] = {}
         children = list(node)
         if children:
-            def_dic: Dict[str, Any] = collections.defaultdict(list)
+            def_dic: dict[str, Any] = collections.defaultdict(list)
             for dc in map(VOCDetection.parse_voc_xml, children):
                 for ind, v in dc.items():
                     def_dic[ind].append(v)
diff --git a/torchvision/datasets/widerface.py b/torchvision/datasets/widerface.py
index b451ebe25b9..9c3b0b3c569 100644
--- a/torchvision/datasets/widerface.py
+++ b/torchvision/datasets/widerface.py
@@ -75,13 +75,13 @@ def __init__(
         if not self._check_integrity():
             raise RuntimeError("Dataset not found or corrupted. You can use download=True to download and prepare it")
 
-        self.img_info: List[Dict[str, Union[str, Dict[str, torch.Tensor]]]] = []
+        self.img_info: list[dict[str, Union[str, dict[str, torch.Tensor]]]] = []
         if self.split in ("train", "val"):
             self.parse_train_val_annotations_file()
         else:
             self.parse_test_annotations_file()
 
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         """
         Args:
             index (int): Index
diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py
index a6ca557a98b..5aac38d8fd6 100644
--- a/torchvision/ops/_utils.py
+++ b/torchvision/ops/_utils.py
@@ -4,7 +4,7 @@
 from torch import nn, Tensor
 
 
-def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor:
+def _cat(tensors: list[Tensor], dim: int = 0) -> Tensor:
     """
     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
     """
@@ -15,7 +15,7 @@ def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor:
     return torch.cat(tensors, dim)
 
 
-def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
+def convert_boxes_to_roi_format(boxes: list[Tensor]) -> Tensor:
     concat_boxes = _cat([b for b in boxes], dim=0)
     temp = []
     for i, b in enumerate(boxes):
@@ -25,7 +25,7 @@ def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
     return rois
 
 
-def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]):
+def check_roi_boxes_shape(boxes: Union[Tensor, list[Tensor]]):
     if isinstance(boxes, (list, tuple)):
         for _tensor in boxes:
             torch._assert(
@@ -39,8 +39,8 @@ def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]):
 
 
 def split_normalization_params(
-    model: nn.Module, norm_classes: Optional[List[type]] = None
-) -> Tuple[List[Tensor], List[Tensor]]:
+    model: nn.Module, norm_classes: Optional[list[type]] = None
+) -> tuple[list[Tensor], list[Tensor]]:
     # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501
     if not norm_classes:
         norm_classes = [
@@ -87,7 +87,7 @@ def _upcast_non_float(t: Tensor) -> Tensor:
 def _loss_inter_union(
     boxes1: torch.Tensor,
     boxes2: torch.Tensor,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
 
     x1, y1, x2, y2 = boxes1.unbind(dim=-1)
     x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 96631278d48..9dde1e29b1d 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -138,7 +138,7 @@ def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor:
     return keep
 
 
-def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
+def clip_boxes_to_image(boxes: Tensor, size: tuple[int, int]) -> Tensor:
     """
     Clip boxes so that they lie inside an image of size ``size``.
 
@@ -253,7 +253,7 @@ def box_area(boxes: Tensor) -> Tensor:
 
 # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
 # with slight modifications
-def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> Tuple[Tensor, Tensor]:
+def _box_inter_union(boxes1: Tensor, boxes2: Tensor) -> tuple[Tensor, Tensor]:
     area1 = box_area(boxes1)
     area2 = box_area(boxes2)
 
@@ -378,7 +378,7 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso
     return diou
 
 
-def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tuple[Tensor, Tensor]:
+def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> tuple[Tensor, Tensor]:
 
     iou = box_iou(boxes1, boxes2)
     lti = torch.min(boxes1[:, None, :2], boxes2[:, :2])
@@ -391,8 +391,8 @@ def _box_diou_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tuple[Te
     x_g = (boxes2[:, 0] + boxes2[:, 2]) / 2
     y_g = (boxes2[:, 1] + boxes2[:, 3]) / 2
     # The distance between boxes' centers squared.
-    centers_distance_squared = (_upcast((x_p[:, None] - x_g[None, :])) ** 2) + (
-        _upcast((y_p[:, None] - y_g[None, :])) ** 2
+    centers_distance_squared = (_upcast(x_p[:, None] - x_g[None, :]) ** 2) + (
+        _upcast(y_p[:, None] - y_g[None, :]) ** 2
     )
     # The distance IoU is the IoU penalized by a normalized
     # distance between boxes' centers squared.
diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index b3cc83332a0..07c947c3f5e 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -16,9 +16,9 @@ def deform_conv2d(
     offset: Tensor,
     weight: Tensor,
     bias: Optional[Tensor] = None,
-    stride: Tuple[int, int] = (1, 1),
-    padding: Tuple[int, int] = (0, 0),
-    dilation: Tuple[int, int] = (1, 1),
+    stride: tuple[int, int] = (1, 1),
+    padding: tuple[int, int] = (0, 0),
+    dilation: tuple[int, int] = (1, 1),
     mask: Optional[Tensor] = None,
 ) -> Tensor:
     r"""
diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py
index c64c6673a88..d9961f400c6 100644
--- a/torchvision/ops/diou_loss.py
+++ b/torchvision/ops/diou_loss.py
@@ -68,7 +68,7 @@ def _diou_iou_loss(
     boxes1: torch.Tensor,
     boxes2: torch.Tensor,
     eps: float = 1e-7,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
 
     intsct, union = _loss_inter_union(boxes1, boxes2)
     iou = intsct / (union + eps)
diff --git a/torchvision/ops/feature_pyramid_network.py b/torchvision/ops/feature_pyramid_network.py
index 2e7aef0e2fa..dedaf0112ec 100644
--- a/torchvision/ops/feature_pyramid_network.py
+++ b/torchvision/ops/feature_pyramid_network.py
@@ -26,10 +26,10 @@ class ExtraFPNBlock(nn.Module):
 
     def forward(
         self,
-        results: List[Tensor],
-        x: List[Tensor],
-        names: List[str],
-    ) -> Tuple[List[Tensor], List[str]]:
+        results: list[Tensor],
+        x: list[Tensor],
+        names: list[str],
+    ) -> tuple[list[Tensor], list[str]]:
         pass
 
 
@@ -76,7 +76,7 @@ class FeaturePyramidNetwork(nn.Module):
 
     def __init__(
         self,
-        in_channels_list: List[int],
+        in_channels_list: list[int],
         out_channels: int,
         extra_blocks: Optional[ExtraFPNBlock] = None,
         norm_layer: Optional[Callable[..., nn.Module]] = None,
@@ -169,7 +169,7 @@ def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
                 out = module(x)
         return out
 
-    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
+    def forward(self, x: dict[str, Tensor]) -> dict[str, Tensor]:
         """
         Computes the FPN for a set of feature maps.
 
@@ -211,10 +211,10 @@ class LastLevelMaxPool(ExtraFPNBlock):
 
     def forward(
         self,
-        x: List[Tensor],
-        y: List[Tensor],
-        names: List[str],
-    ) -> Tuple[List[Tensor], List[str]]:
+        x: list[Tensor],
+        y: list[Tensor],
+        names: list[str],
+    ) -> tuple[list[Tensor], list[str]]:
         names.append("pool")
         # Use max pooling to simulate stride 2 subsampling
         x.append(F.max_pool2d(x[-1], kernel_size=1, stride=2, padding=0))
@@ -237,10 +237,10 @@ def __init__(self, in_channels: int, out_channels: int):
 
     def forward(
         self,
-        p: List[Tensor],
-        c: List[Tensor],
-        names: List[str],
-    ) -> Tuple[List[Tensor], List[str]]:
+        p: list[Tensor],
+        c: list[Tensor],
+        names: list[str],
+    ) -> tuple[list[Tensor], list[str]]:
         p5, c5 = p[-1], c[-1]
         x = p5 if self.use_P5 else c5
         p6 = self.p6(x)
diff --git a/torchvision/ops/misc.py b/torchvision/ops/misc.py
index 0bbea6bce43..59fe13785fe 100644
--- a/torchvision/ops/misc.py
+++ b/torchvision/ops/misc.py
@@ -1,5 +1,6 @@
 import warnings
-from typing import Callable, List, Optional, Sequence, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
+from collections.abc import Sequence
 
 import torch
 from torch import Tensor
@@ -38,9 +39,9 @@ def _load_from_state_dict(
         prefix: str,
         local_metadata: dict,
         strict: bool,
-        missing_keys: List[str],
-        unexpected_keys: List[str],
-        error_msgs: List[str],
+        missing_keys: list[str],
+        unexpected_keys: list[str],
+        error_msgs: list[str],
     ):
         num_batches_tracked_key = prefix + "num_batches_tracked"
         if num_batches_tracked_key in state_dict:
@@ -70,13 +71,13 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, ...]] = 3,
-        stride: Union[int, Tuple[int, ...]] = 1,
-        padding: Optional[Union[int, Tuple[int, ...], str]] = None,
+        kernel_size: Union[int, tuple[int, ...]] = 3,
+        stride: Union[int, tuple[int, ...]] = 1,
+        padding: Optional[Union[int, tuple[int, ...], str]] = None,
         groups: int = 1,
         norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
         activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
-        dilation: Union[int, Tuple[int, ...]] = 1,
+        dilation: Union[int, tuple[int, ...]] = 1,
         inplace: Optional[bool] = True,
         bias: Optional[bool] = None,
         conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d,
@@ -145,13 +146,13 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int]] = 3,
-        stride: Union[int, Tuple[int, int]] = 1,
-        padding: Optional[Union[int, Tuple[int, int], str]] = None,
+        kernel_size: Union[int, tuple[int, int]] = 3,
+        stride: Union[int, tuple[int, int]] = 1,
+        padding: Optional[Union[int, tuple[int, int], str]] = None,
         groups: int = 1,
         norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
         activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
-        dilation: Union[int, Tuple[int, int]] = 1,
+        dilation: Union[int, tuple[int, int]] = 1,
         inplace: Optional[bool] = True,
         bias: Optional[bool] = None,
     ) -> None:
@@ -194,13 +195,13 @@ def __init__(
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int, int]] = 3,
-        stride: Union[int, Tuple[int, int, int]] = 1,
-        padding: Optional[Union[int, Tuple[int, int, int], str]] = None,
+        kernel_size: Union[int, tuple[int, int, int]] = 3,
+        stride: Union[int, tuple[int, int, int]] = 1,
+        padding: Optional[Union[int, tuple[int, int, int], str]] = None,
         groups: int = 1,
         norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm3d,
         activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
-        dilation: Union[int, Tuple[int, int, int]] = 1,
+        dilation: Union[int, tuple[int, int, int]] = 1,
         inplace: Optional[bool] = True,
         bias: Optional[bool] = None,
     ) -> None:
@@ -277,7 +278,7 @@ class MLP(torch.nn.Sequential):
     def __init__(
         self,
         in_channels: int,
-        hidden_channels: List[int],
+        hidden_channels: list[int],
         norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
         activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
         inplace: Optional[bool] = None,
@@ -312,7 +313,7 @@ class Permute(torch.nn.Module):
         dims (List[int]): The desired ordering of dimensions
     """
 
-    def __init__(self, dims: List[int]):
+    def __init__(self, dims: list[int]):
         super().__init__()
         self.dims = dims
 
diff --git a/torchvision/ops/poolers.py b/torchvision/ops/poolers.py
index 9cdd83a598b..10453bd5e1d 100644
--- a/torchvision/ops/poolers.py
+++ b/torchvision/ops/poolers.py
@@ -15,7 +15,7 @@
 # _onnx_merge_levels() is an implementation supported by ONNX
 # that merges the levels to the right indices
 @torch.jit.unused
-def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor:
+def _onnx_merge_levels(levels: Tensor, unmerged_results: list[Tensor]) -> Tensor:
     first_result = unmerged_results[0]
     dtype, device = first_result.dtype, first_result.device
     res = torch.zeros(
@@ -70,7 +70,7 @@ def __init__(
         self.lvl0 = canonical_level
         self.eps = eps
 
-    def __call__(self, boxlists: List[Tensor]) -> Tensor:
+    def __call__(self, boxlists: list[Tensor]) -> Tensor:
         """
         Args:
             boxlists (list[BoxList])
@@ -84,7 +84,7 @@ def __call__(self, boxlists: List[Tensor]) -> Tensor:
         return (target_lvls.to(torch.int64) - self.k_min).to(torch.int64)
 
 
-def _convert_to_roi_format(boxes: List[Tensor]) -> Tensor:
+def _convert_to_roi_format(boxes: list[Tensor]) -> Tensor:
     concat_boxes = torch.cat(boxes, dim=0)
     device, dtype = concat_boxes.device, concat_boxes.dtype
     ids = torch.cat(
@@ -95,10 +95,10 @@ def _convert_to_roi_format(boxes: List[Tensor]) -> Tensor:
     return rois
 
 
-def _infer_scale(feature: Tensor, original_size: List[int]) -> float:
+def _infer_scale(feature: Tensor, original_size: list[int]) -> float:
     # assumption: the scale is of the form 2 ** (-k), with k integer
     size = feature.shape[-2:]
-    possible_scales: List[float] = []
+    possible_scales: list[float] = []
     for s1, s2 in zip(size, original_size):
         approx_scale = float(s1) / float(s2)
         scale = 2 ** float(torch.tensor(approx_scale).log2().round())
@@ -108,8 +108,8 @@ def _infer_scale(feature: Tensor, original_size: List[int]) -> float:
 
 @torch.fx.wrap
 def _setup_scales(
-    features: List[Tensor], image_shapes: List[Tuple[int, int]], canonical_scale: int, canonical_level: int
-) -> Tuple[List[float], LevelMapper]:
+    features: list[Tensor], image_shapes: list[tuple[int, int]], canonical_scale: int, canonical_level: int
+) -> tuple[list[float], LevelMapper]:
     if not image_shapes:
         raise ValueError("images list should not be empty")
     max_x = 0
@@ -135,7 +135,7 @@ def _setup_scales(
 
 
 @torch.fx.wrap
-def _filter_input(x: Dict[str, Tensor], featmap_names: List[str]) -> List[Tensor]:
+def _filter_input(x: dict[str, Tensor], featmap_names: list[str]) -> list[Tensor]:
     x_filtered = []
     for k, v in x.items():
         if k in featmap_names:
@@ -145,11 +145,11 @@ def _filter_input(x: Dict[str, Tensor], featmap_names: List[str]) -> List[Tensor
 
 @torch.fx.wrap
 def _multiscale_roi_align(
-    x_filtered: List[Tensor],
-    boxes: List[Tensor],
-    output_size: List[int],
+    x_filtered: list[Tensor],
+    boxes: list[Tensor],
+    output_size: list[int],
     sampling_ratio: int,
-    scales: Optional[List[float]],
+    scales: Optional[list[float]],
     mapper: Optional[LevelMapper],
 ) -> Tensor:
     """
@@ -263,12 +263,12 @@ class MultiScaleRoIAlign(nn.Module):
 
     """
 
-    __annotations__ = {"scales": Optional[List[float]], "map_levels": Optional[LevelMapper]}
+    __annotations__ = {"scales": Optional[list[float]], "map_levels": Optional[LevelMapper]}
 
     def __init__(
         self,
-        featmap_names: List[str],
-        output_size: Union[int, Tuple[int], List[int]],
+        featmap_names: list[str],
+        output_size: Union[int, tuple[int], list[int]],
         sampling_ratio: int,
         *,
         canonical_scale: int = 224,
@@ -288,9 +288,9 @@ def __init__(
 
     def forward(
         self,
-        x: Dict[str, Tensor],
-        boxes: List[Tensor],
-        image_shapes: List[Tuple[int, int]],
+        x: dict[str, Tensor],
+        boxes: list[Tensor],
+        image_shapes: list[tuple[int, int]],
     ) -> Tensor:
         """
         Args:
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index 8b616ca9161..da774b3704a 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -203,7 +203,7 @@ def from_K(t):
 @torch.fx.wrap
 def roi_align(
     input: Tensor,
-    boxes: Union[Tensor, List[Tensor]],
+    boxes: Union[Tensor, list[Tensor]],
     output_size: BroadcastingList2[int],
     spatial_scale: float = 1.0,
     sampling_ratio: int = -1,
@@ -278,7 +278,7 @@ def __init__(
         self.sampling_ratio = sampling_ratio
         self.aligned = aligned
 
-    def forward(self, input: Tensor, rois: Union[Tensor, List[Tensor]]) -> Tensor:
+    def forward(self, input: Tensor, rois: Union[Tensor, list[Tensor]]) -> Tensor:
         return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned)
 
     def __repr__(self) -> str:
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index 96282418f07..6d0ac1733da 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -14,7 +14,7 @@
 @torch.fx.wrap
 def roi_pool(
     input: Tensor,
-    boxes: Union[Tensor, List[Tensor]],
+    boxes: Union[Tensor, list[Tensor]],
     output_size: BroadcastingList2[int],
     spatial_scale: float = 1.0,
 ) -> Tensor:
@@ -64,7 +64,7 @@ def __init__(self, output_size: BroadcastingList2[int], spatial_scale: float):
         self.output_size = output_size
         self.spatial_scale = spatial_scale
 
-    def forward(self, input: Tensor, rois: Union[Tensor, List[Tensor]]) -> Tensor:
+    def forward(self, input: Tensor, rois: Union[Tensor, list[Tensor]]) -> Tensor:
         return roi_pool(input, rois, self.output_size, self.spatial_scale)
 
     def __repr__(self) -> str:
diff --git a/torchvision/transforms/_functional_pil.py b/torchvision/transforms/_functional_pil.py
index bd943b6ee10..92176cc7f49 100644
--- a/torchvision/transforms/_functional_pil.py
+++ b/torchvision/transforms/_functional_pil.py
@@ -1,5 +1,6 @@
 import numbers
-from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+from collections.abc import Sequence
 
 import numpy as np
 import torch
@@ -20,7 +21,7 @@ def _is_pil_image(img: Any) -> bool:
 
 
 @torch.jit.unused
-def get_dimensions(img: Any) -> List[int]:
+def get_dimensions(img: Any) -> list[int]:
     if _is_pil_image(img):
         if hasattr(img, "getbands"):
             channels = len(img.getbands())
@@ -32,7 +33,7 @@ def get_dimensions(img: Any) -> List[int]:
 
 
 @torch.jit.unused
-def get_image_size(img: Any) -> List[int]:
+def get_image_size(img: Any) -> list[int]:
     if _is_pil_image(img):
         return list(img.size)
     raise TypeError(f"Unexpected type {type(img)}")
@@ -143,8 +144,8 @@ def adjust_gamma(
 @torch.jit.unused
 def pad(
     img: Image.Image,
-    padding: Union[int, List[int], Tuple[int, ...]],
-    fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+    padding: Union[int, list[int], tuple[int, ...]],
+    fill: Optional[Union[float, list[float], tuple[float, ...]]] = 0,
     padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
 ) -> Image.Image:
 
@@ -238,7 +239,7 @@ def crop(
 @torch.jit.unused
 def resize(
     img: Image.Image,
-    size: Union[List[int], int],
+    size: Union[list[int], int],
     interpolation: int = Image.BILINEAR,
 ) -> Image.Image:
 
@@ -252,10 +253,10 @@ def resize(
 
 @torch.jit.unused
 def _parse_fill(
-    fill: Optional[Union[float, List[float], Tuple[float, ...]]],
+    fill: Optional[Union[float, list[float], tuple[float, ...]]],
     img: Image.Image,
     name: str = "fillcolor",
-) -> Dict[str, Optional[Union[float, List[float], Tuple[float, ...]]]]:
+) -> dict[str, Optional[Union[float, list[float], tuple[float, ...]]]]:
 
     # Process fill color for affine transforms
     num_channels = get_image_num_channels(img)
@@ -284,7 +285,7 @@ def _parse_fill(
 @torch.jit.unused
 def affine(
     img: Image.Image,
-    matrix: List[float],
+    matrix: list[float],
     interpolation: int = Image.NEAREST,
     fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
 ) -> Image.Image:
@@ -303,7 +304,7 @@ def rotate(
     angle: float,
     interpolation: int = Image.NEAREST,
     expand: bool = False,
-    center: Optional[Tuple[int, int]] = None,
+    center: Optional[tuple[int, int]] = None,
     fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
 ) -> Image.Image:
 
@@ -317,7 +318,7 @@ def rotate(
 @torch.jit.unused
 def perspective(
     img: Image.Image,
-    perspective_coeffs: List[float],
+    perspective_coeffs: list[float],
     interpolation: int = Image.BICUBIC,
     fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
 ) -> Image.Image:
diff --git a/torchvision/transforms/_functional_tensor.py b/torchvision/transforms/_functional_tensor.py
index 618bbfbab7c..d25282ef5bb 100644
--- a/torchvision/transforms/_functional_tensor.py
+++ b/torchvision/transforms/_functional_tensor.py
@@ -15,14 +15,14 @@ def _assert_image_tensor(img: Tensor) -> None:
         raise TypeError("Tensor is not a torch image.")
 
 
-def get_dimensions(img: Tensor) -> List[int]:
+def get_dimensions(img: Tensor) -> list[int]:
     _assert_image_tensor(img)
     channels = 1 if img.ndim == 2 else img.shape[-3]
     height, width = img.shape[-2:]
     return [channels, height, width]
 
 
-def get_image_size(img: Tensor) -> List[int]:
+def get_image_size(img: Tensor) -> list[int]:
     # Returns (w, h) of tensor image
     _assert_image_tensor(img)
     return [img.shape[-1], img.shape[-2]]
@@ -57,7 +57,7 @@ def _max_value(dtype: torch.dtype) -> int:
         return 1
 
 
-def _assert_channels(img: Tensor, permitted: List[int]) -> None:
+def _assert_channels(img: Tensor, permitted: list[int]) -> None:
     c = get_dimensions(img)[0]
     if c not in permitted:
         raise TypeError(f"Input image tensor permitted channel values are {permitted}, but found {c}")
@@ -321,7 +321,7 @@ def _hsv2rgb(img: Tensor) -> Tensor:
     return torch.einsum("...ijk, ...xijk -> ...xjk", mask.to(dtype=img.dtype), a4)
 
 
-def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
+def _pad_symmetric(img: Tensor, padding: list[int]) -> Tensor:
     # padding is left, right, top, bottom
 
     # crop if needed
@@ -352,7 +352,7 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
         raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")
 
 
-def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]:
+def _parse_pad_padding(padding: Union[int, list[int]]) -> list[int]:
     if isinstance(padding, int):
         if torch.jit.is_scripting():
             # This maybe unreachable
@@ -373,7 +373,7 @@ def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]:
 
 
 def pad(
-    img: Tensor, padding: Union[int, List[int]], fill: Optional[Union[int, float]] = 0, padding_mode: str = "constant"
+    img: Tensor, padding: Union[int, list[int]], fill: Optional[Union[int, float]] = 0, padding_mode: str = "constant"
 ) -> Tensor:
     _assert_image_tensor(img)
 
@@ -440,7 +440,7 @@ def pad(
 
 def resize(
     img: Tensor,
-    size: List[int],
+    size: list[int],
     interpolation: str = "bilinear",
     antialias: Optional[bool] = True,
 ) -> Tensor:
@@ -476,11 +476,11 @@ def resize(
 
 def _assert_grid_transform_inputs(
     img: Tensor,
-    matrix: Optional[List[float]],
+    matrix: Optional[list[float]],
     interpolation: str,
-    fill: Optional[Union[int, float, List[float]]],
-    supported_interpolation_modes: List[str],
-    coeffs: Optional[List[float]] = None,
+    fill: Optional[Union[int, float, list[float]]],
+    supported_interpolation_modes: list[str],
+    coeffs: Optional[list[float]] = None,
 ) -> None:
 
     if not (isinstance(img, torch.Tensor)):
@@ -513,7 +513,7 @@ def _assert_grid_transform_inputs(
         raise ValueError(f"Interpolation mode '{interpolation}' is unsupported with Tensor input")
 
 
-def _cast_squeeze_in(img: Tensor, req_dtypes: List[torch.dtype]) -> Tuple[Tensor, bool, bool, torch.dtype]:
+def _cast_squeeze_in(img: Tensor, req_dtypes: list[torch.dtype]) -> tuple[Tensor, bool, bool, torch.dtype]:
     need_squeeze = False
     # make image NCHW
     if img.ndim < 4:
@@ -543,7 +543,7 @@ def _cast_squeeze_out(img: Tensor, need_cast: bool, need_squeeze: bool, out_dtyp
 
 
 def _apply_grid_transform(
-    img: Tensor, grid: Tensor, mode: str, fill: Optional[Union[int, float, List[float]]]
+    img: Tensor, grid: Tensor, mode: str, fill: Optional[Union[int, float, list[float]]]
 ) -> Tensor:
 
     img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [grid.dtype])
@@ -604,9 +604,9 @@ def _gen_affine_grid(
 
 def affine(
     img: Tensor,
-    matrix: List[float],
+    matrix: list[float],
     interpolation: str = "nearest",
-    fill: Optional[Union[int, float, List[float]]] = None,
+    fill: Optional[Union[int, float, list[float]]] = None,
 ) -> Tensor:
     _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
 
@@ -618,7 +618,7 @@ def affine(
     return _apply_grid_transform(img, grid, interpolation, fill=fill)
 
 
-def _compute_affine_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+def _compute_affine_output_size(matrix: list[float], w: int, h: int) -> tuple[int, int]:
 
     # Inspired of PIL implementation:
     # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
@@ -653,10 +653,10 @@ def _compute_affine_output_size(matrix: List[float], w: int, h: int) -> Tuple[in
 
 def rotate(
     img: Tensor,
-    matrix: List[float],
+    matrix: list[float],
     interpolation: str = "nearest",
     expand: bool = False,
-    fill: Optional[Union[int, float, List[float]]] = None,
+    fill: Optional[Union[int, float, list[float]]] = None,
 ) -> Tensor:
     _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
     w, h = img.shape[-1], img.shape[-2]
@@ -669,7 +669,7 @@ def rotate(
     return _apply_grid_transform(img, grid, interpolation, fill=fill)
 
 
-def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> Tensor:
+def _perspective_grid(coeffs: list[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> Tensor:
     # https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/
     # src/libImaging/Geometry.c#L394
 
@@ -700,9 +700,9 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype,
 
 def perspective(
     img: Tensor,
-    perspective_coeffs: List[float],
+    perspective_coeffs: list[float],
     interpolation: str = "bilinear",
-    fill: Optional[Union[int, float, List[float]]] = None,
+    fill: Optional[Union[int, float, list[float]]] = None,
 ) -> Tensor:
     if not (isinstance(img, torch.Tensor)):
         raise TypeError("Input img should be Tensor.")
@@ -735,7 +735,7 @@ def _get_gaussian_kernel1d(kernel_size: int, sigma: float, dtype: torch.dtype, d
 
 
 def _get_gaussian_kernel2d(
-    kernel_size: List[int], sigma: List[float], dtype: torch.dtype, device: torch.device
+    kernel_size: list[int], sigma: list[float], dtype: torch.dtype, device: torch.device
 ) -> Tensor:
     kernel1d_x = _get_gaussian_kernel1d(kernel_size[0], sigma[0], dtype, device)
     kernel1d_y = _get_gaussian_kernel1d(kernel_size[1], sigma[1], dtype, device)
@@ -743,7 +743,7 @@ def _get_gaussian_kernel2d(
     return kernel2d
 
 
-def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor:
+def gaussian_blur(img: Tensor, kernel_size: list[int], sigma: list[float]) -> Tensor:
     if not (isinstance(img, torch.Tensor)):
         raise TypeError(f"img should be Tensor. Got {type(img)}")
 
@@ -902,7 +902,7 @@ def equalize(img: Tensor) -> Tensor:
     return torch.stack([_equalize_single_image(x) for x in img])
 
 
-def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor:
+def normalize(tensor: Tensor, mean: list[float], std: list[float], inplace: bool = False) -> Tensor:
     _assert_image_tensor(tensor)
 
     if not tensor.is_floating_point():
@@ -938,7 +938,7 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool
     return img
 
 
-def _create_identity_grid(size: List[int]) -> Tensor:
+def _create_identity_grid(size: list[int]) -> Tensor:
     hw_space = [torch.linspace((-s + 1) / s, (s - 1) / s, s) for s in size]
     grid_y, grid_x = torch.meshgrid(hw_space, indexing="ij")
     return torch.stack([grid_x, grid_y], -1).unsqueeze(0)  # 1 x H x W x 2
@@ -948,7 +948,7 @@ def elastic_transform(
     img: Tensor,
     displacement: Tensor,
     interpolation: str = "bilinear",
-    fill: Optional[Union[int, float, List[float]]] = None,
+    fill: Optional[Union[int, float, list[float]]] = None,
 ) -> Tensor:
 
     if not (isinstance(img, torch.Tensor)):
diff --git a/torchvision/transforms/_presets.py b/torchvision/transforms/_presets.py
index fb6f4ad5ca5..c12ce72b9e4 100644
--- a/torchvision/transforms/_presets.py
+++ b/torchvision/transforms/_presets.py
@@ -41,8 +41,8 @@ def __init__(
         *,
         crop_size: int,
         resize_size: int = 256,
-        mean: Tuple[float, ...] = (0.485, 0.456, 0.406),
-        std: Tuple[float, ...] = (0.229, 0.224, 0.225),
+        mean: tuple[float, ...] = (0.485, 0.456, 0.406),
+        std: tuple[float, ...] = (0.229, 0.224, 0.225),
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
         antialias: Optional[bool] = True,
     ) -> None:
@@ -86,10 +86,10 @@ class VideoClassification(nn.Module):
     def __init__(
         self,
         *,
-        crop_size: Tuple[int, int],
-        resize_size: Union[Tuple[int], Tuple[int, int]],
-        mean: Tuple[float, ...] = (0.43216, 0.394666, 0.37645),
-        std: Tuple[float, ...] = (0.22803, 0.22145, 0.216989),
+        crop_size: tuple[int, int],
+        resize_size: Union[tuple[int], tuple[int, int]],
+        mean: tuple[float, ...] = (0.43216, 0.394666, 0.37645),
+        std: tuple[float, ...] = (0.22803, 0.22145, 0.216989),
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     ) -> None:
         super().__init__()
@@ -148,8 +148,8 @@ def __init__(
         self,
         *,
         resize_size: Optional[int],
-        mean: Tuple[float, ...] = (0.485, 0.456, 0.406),
-        std: Tuple[float, ...] = (0.229, 0.224, 0.225),
+        mean: tuple[float, ...] = (0.485, 0.456, 0.406),
+        std: tuple[float, ...] = (0.229, 0.224, 0.225),
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
         antialias: Optional[bool] = True,
     ) -> None:
@@ -188,7 +188,7 @@ def describe(self) -> str:
 
 
 class OpticalFlow(nn.Module):
-    def forward(self, img1: Tensor, img2: Tensor) -> Tuple[Tensor, Tensor]:
+    def forward(self, img1: Tensor, img2: Tensor) -> tuple[Tensor, Tensor]:
         if not isinstance(img1, Tensor):
             img1 = F.pil_to_tensor(img1)
         if not isinstance(img2, Tensor):
diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py
index 9dbbe91e741..52df49cd0f7 100644
--- a/torchvision/transforms/autoaugment.py
+++ b/torchvision/transforms/autoaugment.py
@@ -11,7 +11,7 @@
 
 
 def _apply_op(
-    img: Tensor, op_name: str, magnitude: float, interpolation: InterpolationMode, fill: Optional[List[float]]
+    img: Tensor, op_name: str, magnitude: float, interpolation: InterpolationMode, fill: Optional[list[float]]
 ):
     if op_name == "ShearX":
         # magnitude should be arctan(magnitude)
@@ -122,7 +122,7 @@ def __init__(
         self,
         policy: AutoAugmentPolicy = AutoAugmentPolicy.IMAGENET,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[List[float]] = None,
+        fill: Optional[list[float]] = None,
     ) -> None:
         super().__init__()
         self.policy = policy
@@ -132,7 +132,7 @@ def __init__(
 
     def _get_policies(
         self, policy: AutoAugmentPolicy
-    ) -> List[Tuple[Tuple[str, float, Optional[int]], Tuple[str, float, Optional[int]]]]:
+    ) -> list[tuple[tuple[str, float, Optional[int]], tuple[str, float, Optional[int]]]]:
         if policy == AutoAugmentPolicy.IMAGENET:
             return [
                 (("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),
@@ -220,7 +220,7 @@ def _get_policies(
         else:
             raise ValueError(f"The provided policy {policy} is not recognized.")
 
-    def _augmentation_space(self, num_bins: int, image_size: Tuple[int, int]) -> Dict[str, Tuple[Tensor, bool]]:
+    def _augmentation_space(self, num_bins: int, image_size: tuple[int, int]) -> dict[str, tuple[Tensor, bool]]:
         return {
             # op_name: (magnitudes, signed)
             "ShearX": (torch.linspace(0.0, 0.3, num_bins), True),
@@ -240,7 +240,7 @@ def _augmentation_space(self, num_bins: int, image_size: Tuple[int, int]) -> Dic
         }
 
     @staticmethod
-    def get_params(transform_num: int) -> Tuple[int, Tensor, Tensor]:
+    def get_params(transform_num: int) -> tuple[int, Tensor, Tensor]:
         """Get parameters for autoaugment transformation
 
         Returns:
@@ -309,7 +309,7 @@ def __init__(
         magnitude: int = 9,
         num_magnitude_bins: int = 31,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[List[float]] = None,
+        fill: Optional[list[float]] = None,
     ) -> None:
         super().__init__()
         self.num_ops = num_ops
@@ -318,7 +318,7 @@ def __init__(
         self.interpolation = interpolation
         self.fill = fill
 
-    def _augmentation_space(self, num_bins: int, image_size: Tuple[int, int]) -> Dict[str, Tuple[Tensor, bool]]:
+    def _augmentation_space(self, num_bins: int, image_size: tuple[int, int]) -> dict[str, tuple[Tensor, bool]]:
         return {
             # op_name: (magnitudes, signed)
             "Identity": (torch.tensor(0.0), False),
@@ -397,14 +397,14 @@ def __init__(
         self,
         num_magnitude_bins: int = 31,
         interpolation: InterpolationMode = InterpolationMode.NEAREST,
-        fill: Optional[List[float]] = None,
+        fill: Optional[list[float]] = None,
     ) -> None:
         super().__init__()
         self.num_magnitude_bins = num_magnitude_bins
         self.interpolation = interpolation
         self.fill = fill
 
-    def _augmentation_space(self, num_bins: int) -> Dict[str, Tuple[Tensor, bool]]:
+    def _augmentation_space(self, num_bins: int) -> dict[str, tuple[Tensor, bool]]:
         return {
             # op_name: (magnitudes, signed)
             "Identity": (torch.tensor(0.0), False),
@@ -492,7 +492,7 @@ def __init__(
         alpha: float = 1.0,
         all_ops: bool = True,
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-        fill: Optional[List[float]] = None,
+        fill: Optional[list[float]] = None,
     ) -> None:
         super().__init__()
         self._PARAMETER_MAX = 10
@@ -506,7 +506,7 @@ def __init__(
         self.interpolation = interpolation
         self.fill = fill
 
-    def _augmentation_space(self, num_bins: int, image_size: Tuple[int, int]) -> Dict[str, Tuple[Tensor, bool]]:
+    def _augmentation_space(self, num_bins: int, image_size: tuple[int, int]) -> dict[str, tuple[Tensor, bool]]:
         s = {
             # op_name: (magnitudes, signed)
             "ShearX": (torch.linspace(0.0, 0.3, num_bins), True),
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index 8efe2a8878a..f7d265c5ab5 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -63,7 +63,7 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode:
 _is_pil_image = F_pil._is_pil_image
 
 
-def get_dimensions(img: Tensor) -> List[int]:
+def get_dimensions(img: Tensor) -> list[int]:
     """Returns the dimensions of an image as [channels, height, width].
 
     Args:
@@ -80,7 +80,7 @@ def get_dimensions(img: Tensor) -> List[int]:
     return F_pil.get_dimensions(img)
 
 
-def get_image_size(img: Tensor) -> List[int]:
+def get_image_size(img: Tensor) -> list[int]:
     """Returns the size of an image as [width, height].
 
     Args:
@@ -324,7 +324,7 @@ def to_pil_image(pic, mode=None):
     return Image.fromarray(npimg, mode=mode)
 
 
-def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor:
+def normalize(tensor: Tensor, mean: list[float], std: list[float], inplace: bool = False) -> Tensor:
     """Normalize a float tensor image with mean and standard deviation.
     This transform does not support PIL Image.
 
@@ -351,11 +351,11 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
 
 
 def _compute_resized_output_size(
-    image_size: Tuple[int, int],
-    size: Optional[List[int]],
+    image_size: tuple[int, int],
+    size: Optional[list[int]],
     max_size: Optional[int] = None,
     allow_size_none: bool = False,  # only True in v2
-) -> List[int]:
+) -> list[int]:
     h, w = image_size
     short, long = (w, h) if w <= h else (h, w)
     if size is None:
@@ -386,7 +386,7 @@ def _compute_resized_output_size(
 
 def resize(
     img: Tensor,
-    size: List[int],
+    size: list[int],
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     max_size: Optional[int] = None,
     antialias: Optional[bool] = True,
@@ -479,7 +479,7 @@ def resize(
     return F_t.resize(img, size=output_size, interpolation=interpolation.value, antialias=antialias)
 
 
-def pad(img: Tensor, padding: List[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor:
+def pad(img: Tensor, padding: list[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor:
     r"""Pad the given image on all sides with the given "pad" value.
     If the image is torch Tensor, it is expected
     to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
@@ -553,7 +553,7 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     return F_t.crop(img, top, left, height, width)
 
 
-def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
+def center_crop(img: Tensor, output_size: list[int]) -> Tensor:
     """Crops the given image at the center.
     If the image is torch Tensor, it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
@@ -600,7 +600,7 @@ def resized_crop(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     antialias: Optional[bool] = True,
 ) -> Tensor:
@@ -671,7 +671,7 @@ def hflip(img: Tensor) -> Tensor:
     return F_t.hflip(img)
 
 
-def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[int]]) -> List[float]:
+def _get_perspective_coeffs(startpoints: list[list[int]], endpoints: list[list[int]]) -> list[float]:
     """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
 
     In Perspective Transform each pixel (x, y) in the original image gets transformed as,
@@ -700,16 +700,16 @@ def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[i
     # do least squares in double precision to prevent numerical issues
     res = torch.linalg.lstsq(a_matrix, b_matrix, driver="gels").solution.to(torch.float32)
 
-    output: List[float] = res.tolist()
+    output: list[float] = res.tolist()
     return output
 
 
 def perspective(
     img: Tensor,
-    startpoints: List[List[int]],
-    endpoints: List[List[int]],
+    startpoints: list[list[int]],
+    endpoints: list[list[int]],
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-    fill: Optional[List[float]] = None,
+    fill: Optional[list[float]] = None,
 ) -> Tensor:
     """Perform perspective transform of the given image.
     If the image is torch Tensor, it is expected
@@ -774,7 +774,7 @@ def vflip(img: Tensor) -> Tensor:
     return F_t.vflip(img)
 
 
-def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+def five_crop(img: Tensor, size: list[int]) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
     """Crop the given image into four corners and the central crop.
     If the image is torch Tensor, it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
@@ -820,8 +820,8 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten
 
 
 def ten_crop(
-    img: Tensor, size: List[int], vertical_flip: bool = False
-) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
+    img: Tensor, size: list[int], vertical_flip: bool = False
+) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
     """Generate ten cropped images from the given image.
     Crop the given image into four corners and the central crop plus the
     flipped version of these (horizontal flipping is used by default).
@@ -1004,8 +1004,8 @@ def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
 
 
 def _get_inverse_affine_matrix(
-    center: List[float], angle: float, translate: List[float], scale: float, shear: List[float], inverted: bool = True
-) -> List[float]:
+    center: list[float], angle: float, translate: list[float], scale: float, shear: list[float], inverted: bool = True
+) -> list[float]:
     # Helper method to compute inverse matrix for affine transformation
 
     # Pillow requires inverse affine transformation matrix:
@@ -1068,8 +1068,8 @@ def rotate(
     angle: float,
     interpolation: InterpolationMode = InterpolationMode.NEAREST,
     expand: bool = False,
-    center: Optional[List[int]] = None,
-    fill: Optional[List[float]] = None,
+    center: Optional[list[int]] = None,
+    fill: Optional[list[float]] = None,
 ) -> Tensor:
     """Rotate the image by angle.
     If the image is torch Tensor, it is expected
@@ -1135,12 +1135,12 @@ def rotate(
 def affine(
     img: Tensor,
     angle: float,
-    translate: List[int],
+    translate: list[int],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: InterpolationMode = InterpolationMode.NEAREST,
-    fill: Optional[List[float]] = None,
-    center: Optional[List[int]] = None,
+    fill: Optional[list[float]] = None,
+    center: Optional[list[int]] = None,
 ) -> Tensor:
     """Apply affine transformation on the image keeping image center invariant.
     If the image is torch Tensor, it is expected
@@ -1315,7 +1315,7 @@ def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool
     return F_t.erase(img, i, j, h, w, v, inplace=inplace)
 
 
-def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor:
+def gaussian_blur(img: Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None) -> Tensor:
     """Performs Gaussian blurring on the image by given kernel
 
     The convolution will be using reflection padding corresponding to the kernel size, to maintain the input shape.
@@ -1519,7 +1519,7 @@ def elastic_transform(
     img: Tensor,
     displacement: Tensor,
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
-    fill: Optional[List[float]] = None,
+    fill: Optional[list[float]] = None,
 ) -> Tensor:
     """Transform a tensor image with elastic transformations.
     Given alpha and sigma, it will generate displacement
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 07932390efe..a029b152fd9 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -623,7 +623,7 @@ class RandomCrop(torch.nn.Module):
     """
 
     @staticmethod
-    def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
+    def get_params(img: Tensor, output_size: tuple[int, int]) -> tuple[int, int, int, int]:
         """Get parameters for ``crop`` for a random crop.
 
         Args:
@@ -806,7 +806,7 @@ def forward(self, img):
         return img
 
     @staticmethod
-    def get_params(width: int, height: int, distortion_scale: float) -> Tuple[List[List[int]], List[List[int]]]:
+    def get_params(width: int, height: int, distortion_scale: float) -> tuple[list[list[int]], list[list[int]]]:
         """Get parameters for ``perspective`` for a random perspective transform.
 
         Args:
@@ -918,7 +918,7 @@ def __init__(
         self.ratio = ratio
 
     @staticmethod
-    def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]:
+    def get_params(img: Tensor, scale: list[float], ratio: list[float]) -> tuple[int, int, int, int]:
         """Get parameters for ``crop`` for a random sized crop.
 
         Args:
@@ -1190,10 +1190,10 @@ class ColorJitter(torch.nn.Module):
 
     def __init__(
         self,
-        brightness: Union[float, Tuple[float, float]] = 0,
-        contrast: Union[float, Tuple[float, float]] = 0,
-        saturation: Union[float, Tuple[float, float]] = 0,
-        hue: Union[float, Tuple[float, float]] = 0,
+        brightness: Union[float, tuple[float, float]] = 0,
+        contrast: Union[float, tuple[float, float]] = 0,
+        saturation: Union[float, tuple[float, float]] = 0,
+        hue: Union[float, tuple[float, float]] = 0,
     ) -> None:
         super().__init__()
         _log_api_usage_once(self)
@@ -1227,11 +1227,11 @@ def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_firs
 
     @staticmethod
     def get_params(
-        brightness: Optional[List[float]],
-        contrast: Optional[List[float]],
-        saturation: Optional[List[float]],
-        hue: Optional[List[float]],
-    ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]:
+        brightness: Optional[list[float]],
+        contrast: Optional[list[float]],
+        saturation: Optional[list[float]],
+        hue: Optional[list[float]],
+    ) -> tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]:
         """Get the parameters for the randomized transform to be applied on image.
 
         Args:
@@ -1343,7 +1343,7 @@ def __init__(self, degrees, interpolation=InterpolationMode.NEAREST, expand=Fals
         self.fill = fill
 
     @staticmethod
-    def get_params(degrees: List[float]) -> float:
+    def get_params(degrees: list[float]) -> float:
         """Get parameters for ``rotate`` for a random rotation.
 
         Returns:
@@ -1471,12 +1471,12 @@ def __init__(
 
     @staticmethod
     def get_params(
-        degrees: List[float],
-        translate: Optional[List[float]],
-        scale_ranges: Optional[List[float]],
-        shears: Optional[List[float]],
-        img_size: List[int],
-    ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]:
+        degrees: list[float],
+        translate: Optional[list[float]],
+        scale_ranges: Optional[list[float]],
+        shears: Optional[list[float]],
+        img_size: list[int],
+    ) -> tuple[float, tuple[int, int], float, tuple[float, float]]:
         """Get parameters for affine transformation
 
         Returns:
@@ -1668,8 +1668,8 @@ def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace
 
     @staticmethod
     def get_params(
-        img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[List[float]] = None
-    ) -> Tuple[int, int, int, int, Tensor]:
+        img: Tensor, scale: tuple[float, float], ratio: tuple[float, float], value: Optional[list[float]] = None
+    ) -> tuple[int, int, int, int, Tensor]:
         """Get parameters for ``erase`` for a random erasing.
 
         Args:
@@ -1728,7 +1728,7 @@ def forward(self, img):
             else:
                 value = self.value
 
-            if value is not None and not (len(value) in (1, img.shape[-3])):
+            if value is not None and len(value) not in (1, img.shape[-3]):
                 raise ValueError(
                     "If value is a sequence, it should have either a single value or "
                     f"{img.shape[-3]} (number of input channels)"
@@ -2112,7 +2112,7 @@ def __init__(self, alpha=50.0, sigma=5.0, interpolation=InterpolationMode.BILINE
         self.fill = fill
 
     @staticmethod
-    def get_params(alpha: List[float], sigma: List[float], size: List[int]) -> Tensor:
+    def get_params(alpha: list[float], sigma: list[float], size: list[int]) -> Tensor:
         dx = torch.rand([1, 1] + size) * 2 - 1
         if sigma[0] > 0.0:
             kx = int(8 * sigma[0] + 1)
diff --git a/torchvision/transforms/v2/_augment.py b/torchvision/transforms/v2/_augment.py
index 93d4ba45d65..d0746dead9d 100644
--- a/torchvision/transforms/v2/_augment.py
+++ b/torchvision/transforms/v2/_augment.py
@@ -1,7 +1,8 @@
 import math
 import numbers
 import warnings
-from typing import Any, Callable, Dict, List, Optional, Sequence, Union
+from typing import Any, Callable, Dict, List, Optional, Union
+from collections.abc import Sequence
 
 import PIL.Image
 import torch
@@ -47,7 +48,7 @@ class RandomErasing(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomErasing
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         return dict(
             super()._extract_params_for_v1_transform(),
             value="random" if self.value is None else self.value,
@@ -96,10 +97,10 @@ def _call_kernel(self, functional: Callable, inpt: Any, *args: Any, **kwargs: An
             )
         return super()._call_kernel(functional, inpt, *args, **kwargs)
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         img_c, img_h, img_w = query_chw(flat_inputs)
 
-        if self.value is not None and not (len(self.value) in (1, img_c)):
+        if self.value is not None and len(self.value) not in (1, img_c):
             raise ValueError(
                 f"If value is a sequence, it should have either a single value or {img_c} (number of inpt channels)"
             )
@@ -134,7 +135,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(i=i, j=j, h=h, w=w, v=v)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if params["v"] is not None:
             inpt = self._call_kernel(F.erase, inpt, **params, inplace=self.inplace)
 
@@ -243,10 +244,10 @@ class MixUp(_BaseMixUpCutMix):
             It can also be a callable that takes the same input as the transform, and returns the labels.
     """
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         return dict(lam=float(self._dist.sample(())))  # type: ignore[arg-type]
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         lam = params["lam"]
 
         if inpt is params["labels"]:
@@ -292,7 +293,7 @@ class CutMix(_BaseMixUpCutMix):
             It can also be a callable that takes the same input as the transform, and returns the labels.
     """
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         lam = float(self._dist.sample(()))  # type: ignore[arg-type]
 
         H, W = query_size(flat_inputs)
@@ -314,7 +315,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(box=box, lam_adjusted=lam_adjusted)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if inpt is params["labels"]:
             return self._mixup_label(inpt, lam=params["lam_adjusted"])
         elif isinstance(inpt, (tv_tensors.Image, tv_tensors.Video)) or is_pure_tensor(inpt):
@@ -361,9 +362,9 @@ def __init__(self, quality: Union[int, Sequence[int]]):
 
         self.quality = quality
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         quality = torch.randint(self.quality[0], self.quality[1] + 1, ()).item()
         return dict(quality=quality)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.jpeg, inpt, quality=params["quality"])
diff --git a/torchvision/transforms/v2/_auto_augment.py b/torchvision/transforms/v2/_auto_augment.py
index 4dd7ba343aa..e4cfceb0ca6 100644
--- a/torchvision/transforms/v2/_auto_augment.py
+++ b/torchvision/transforms/v2/_auto_augment.py
@@ -23,14 +23,14 @@ def __init__(
         self,
         *,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__()
         self.interpolation = _check_interpolation(interpolation)
         self.fill = fill
         self._fill = _setup_fill_arg(fill)
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         params = super()._extract_params_for_v1_transform()
 
         if isinstance(params["fill"], dict):
@@ -38,7 +38,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
 
         return params
 
-    def _get_random_item(self, dct: Dict[str, Tuple[Callable, bool]]) -> Tuple[str, Tuple[Callable, bool]]:
+    def _get_random_item(self, dct: dict[str, tuple[Callable, bool]]) -> tuple[str, tuple[Callable, bool]]:
         keys = tuple(dct.keys())
         key = keys[int(torch.randint(len(keys), ()))]
         return key, dct[key]
@@ -46,8 +46,8 @@ def _get_random_item(self, dct: Dict[str, Tuple[Callable, bool]]) -> Tuple[str,
     def _flatten_and_extract_image_or_video(
         self,
         inputs: Any,
-        unsupported_types: Tuple[Type, ...] = (tv_tensors.BoundingBoxes, tv_tensors.Mask),
-    ) -> Tuple[Tuple[List[Any], TreeSpec, int], ImageOrVideo]:
+        unsupported_types: tuple[type, ...] = (tv_tensors.BoundingBoxes, tv_tensors.Mask),
+    ) -> tuple[tuple[list[Any], TreeSpec, int], ImageOrVideo]:
         flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
         needs_transform_list = self._needs_transform_list(flat_inputs)
 
@@ -79,7 +79,7 @@ def _flatten_and_extract_image_or_video(
 
     def _unflatten_and_insert_image_or_video(
         self,
-        flat_inputs_with_spec: Tuple[List[Any], TreeSpec, int],
+        flat_inputs_with_spec: tuple[list[Any], TreeSpec, int],
         image_or_video: ImageOrVideo,
     ) -> Any:
         flat_inputs, spec, idx = flat_inputs_with_spec
@@ -92,7 +92,7 @@ def _apply_image_or_video_transform(
         transform_id: str,
         magnitude: float,
         interpolation: Union[InterpolationMode, int],
-        fill: Dict[Union[Type, str], _FillTypeJIT],
+        fill: dict[Union[type, str], _FillTypeJIT],
     ) -> ImageOrVideo:
         # Note: this cast is wrong and is only here to make mypy happy (it disagrees with torchscript)
         image = cast(torch.Tensor, image)
@@ -226,7 +226,7 @@ def __init__(
         self,
         policy: AutoAugmentPolicy = AutoAugmentPolicy.IMAGENET,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)
         self.policy = policy
@@ -234,7 +234,7 @@ def __init__(
 
     def _get_policies(
         self, policy: AutoAugmentPolicy
-    ) -> List[Tuple[Tuple[str, float, Optional[int]], Tuple[str, float, Optional[int]]]]:
+    ) -> list[tuple[tuple[str, float, Optional[int]], tuple[str, float, Optional[int]]]]:
         if policy == AutoAugmentPolicy.IMAGENET:
             return [
                 (("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),
@@ -404,7 +404,7 @@ def __init__(
         magnitude: int = 9,
         num_magnitude_bins: int = 31,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)
         self.num_ops = num_ops
@@ -475,7 +475,7 @@ def __init__(
         self,
         num_magnitude_bins: int = 31,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ):
         super().__init__(interpolation=interpolation, fill=fill)
         self.num_magnitude_bins = num_magnitude_bins
@@ -540,7 +540,7 @@ class AugMix(_AutoAugmentBase):
         "AutoContrast": (lambda num_bins, height, width: None, False),
         "Equalize": (lambda num_bins, height, width: None, False),
     }
-    _AUGMENTATION_SPACE: Dict[str, Tuple[Callable[[int, int, int], Optional[torch.Tensor]], bool]] = {
+    _AUGMENTATION_SPACE: dict[str, tuple[Callable[[int, int, int], Optional[torch.Tensor]], bool]] = {
         **_PARTIAL_AUGMENTATION_SPACE,
         "Brightness": (lambda num_bins, height, width: torch.linspace(0.0, 0.9, num_bins), True),
         "Color": (lambda num_bins, height, width: torch.linspace(0.0, 0.9, num_bins), True),
@@ -556,7 +556,7 @@ def __init__(
         alpha: float = 1.0,
         all_ops: bool = True,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)
         self._PARAMETER_MAX = 10
diff --git a/torchvision/transforms/v2/_color.py b/torchvision/transforms/v2/_color.py
index 7a471e7c1f6..e00a4b5e29e 100644
--- a/torchvision/transforms/v2/_color.py
+++ b/torchvision/transforms/v2/_color.py
@@ -1,5 +1,6 @@
 import collections.abc
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
+from collections.abc import Sequence
 
 import torch
 from torchvision import transforms as _transforms
@@ -25,7 +26,7 @@ def __init__(self, num_output_channels: int = 1):
         super().__init__()
         self.num_output_channels = num_output_channels
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.rgb_to_grayscale, inpt, num_output_channels=self.num_output_channels)
 
 
@@ -46,11 +47,11 @@ class RandomGrayscale(_RandomApplyTransform):
     def __init__(self, p: float = 0.1) -> None:
         super().__init__(p=p)
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         num_input_channels, *_ = query_chw(flat_inputs)
         return dict(num_input_channels=num_input_channels)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.rgb_to_grayscale, inpt, num_output_channels=params["num_input_channels"])
 
 
@@ -64,7 +65,7 @@ class RGB(Transform):
     def __init__(self):
         super().__init__()
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.grayscale_to_rgb, inpt)
 
 
@@ -95,7 +96,7 @@ class ColorJitter(Transform):
 
     _v1_transform_cls = _transforms.ColorJitter
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         return {attr: value or 0 for attr, value in super()._extract_params_for_v1_transform().items()}
 
     def __init__(
@@ -116,9 +117,9 @@ def _check_input(
         value: Optional[Union[float, Sequence[float]]],
         name: str,
         center: float = 1.0,
-        bound: Tuple[float, float] = (0, float("inf")),
+        bound: tuple[float, float] = (0, float("inf")),
         clip_first_on_zero: bool = True,
-    ) -> Optional[Tuple[float, float]]:
+    ) -> Optional[tuple[float, float]]:
         if value is None:
             return None
 
@@ -142,7 +143,7 @@ def _check_input(
     def _generate_value(left: float, right: float) -> float:
         return torch.empty(1).uniform_(left, right).item()
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         fn_idx = torch.randperm(4)
 
         b = None if self.brightness is None else self._generate_value(self.brightness[0], self.brightness[1])
@@ -152,7 +153,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(fn_idx=fn_idx, brightness_factor=b, contrast_factor=c, saturation_factor=s, hue_factor=h)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         output = inpt
         brightness_factor = params["brightness_factor"]
         contrast_factor = params["contrast_factor"]
@@ -173,11 +174,11 @@ def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
 class RandomChannelPermutation(Transform):
     """Randomly permute the channels of an image or video"""
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         num_channels, *_ = query_chw(flat_inputs)
         return dict(permutation=torch.randperm(num_channels))
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.permute_channels, inpt, params["permutation"])
 
 
@@ -207,10 +208,10 @@ class RandomPhotometricDistort(Transform):
 
     def __init__(
         self,
-        brightness: Tuple[float, float] = (0.875, 1.125),
-        contrast: Tuple[float, float] = (0.5, 1.5),
-        saturation: Tuple[float, float] = (0.5, 1.5),
-        hue: Tuple[float, float] = (-0.05, 0.05),
+        brightness: tuple[float, float] = (0.875, 1.125),
+        contrast: tuple[float, float] = (0.5, 1.5),
+        saturation: tuple[float, float] = (0.5, 1.5),
+        hue: tuple[float, float] = (-0.05, 0.05),
         p: float = 0.5,
     ):
         super().__init__()
@@ -220,9 +221,9 @@ def __init__(
         self.saturation = saturation
         self.p = p
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         num_channels, *_ = query_chw(flat_inputs)
-        params: Dict[str, Any] = {
+        params: dict[str, Any] = {
             key: ColorJitter._generate_value(range[0], range[1]) if torch.rand(1) < self.p else None
             for key, range in [
                 ("brightness_factor", self.brightness),
@@ -235,7 +236,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         params["channel_permutation"] = torch.randperm(num_channels) if torch.rand(1) < self.p else None
         return params
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if params["brightness_factor"] is not None:
             inpt = self._call_kernel(F.adjust_brightness, inpt, brightness_factor=params["brightness_factor"])
         if params["contrast_factor"] is not None and params["contrast_before"]:
@@ -264,7 +265,7 @@ class RandomEqualize(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomEqualize
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.equalize, inpt)
 
 
@@ -281,7 +282,7 @@ class RandomInvert(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomInvert
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.invert, inpt)
 
 
@@ -304,7 +305,7 @@ def __init__(self, bits: int, p: float = 0.5) -> None:
         super().__init__(p=p)
         self.bits = bits
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.posterize, inpt, bits=self.bits)
 
 
@@ -323,7 +324,7 @@ class RandomSolarize(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomSolarize
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         params = super()._extract_params_for_v1_transform()
         params["threshold"] = float(params["threshold"])
         return params
@@ -332,7 +333,7 @@ def __init__(self, threshold: float, p: float = 0.5) -> None:
         super().__init__(p=p)
         self.threshold = threshold
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.solarize, inpt, threshold=self.threshold)
 
 
@@ -349,7 +350,7 @@ class RandomAutocontrast(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomAutocontrast
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.autocontrast, inpt)
 
 
@@ -372,5 +373,5 @@ def __init__(self, sharpness_factor: float, p: float = 0.5) -> None:
         super().__init__(p=p)
         self.sharpness_factor = sharpness_factor
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.adjust_sharpness, inpt, sharpness_factor=self.sharpness_factor)
diff --git a/torchvision/transforms/v2/_container.py b/torchvision/transforms/v2/_container.py
index 54de601c696..222a46b00f6 100644
--- a/torchvision/transforms/v2/_container.py
+++ b/torchvision/transforms/v2/_container.py
@@ -1,4 +1,5 @@
-from typing import Any, Callable, Dict, List, Optional, Sequence, Union
+from typing import Any, Callable, Dict, List, Optional, Union
+from collections.abc import Sequence
 
 import torch
 
@@ -92,7 +93,7 @@ def __init__(self, transforms: Union[Sequence[Callable], nn.ModuleList], p: floa
             raise ValueError("`p` should be a floating point value in the interval [0.0, 1.0].")
         self.p = p
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         return {"transforms": self.transforms, "p": self.p}
 
     def forward(self, *inputs: Any) -> Any:
@@ -128,7 +129,7 @@ class RandomChoice(Transform):
     def __init__(
         self,
         transforms: Sequence[Callable],
-        p: Optional[List[float]] = None,
+        p: Optional[list[float]] = None,
     ) -> None:
         if not isinstance(transforms, Sequence):
             raise TypeError("Argument transforms should be a sequence of callables")
diff --git a/torchvision/transforms/v2/_deprecated.py b/torchvision/transforms/v2/_deprecated.py
index 51a4f076e49..2f59867598f 100644
--- a/torchvision/transforms/v2/_deprecated.py
+++ b/torchvision/transforms/v2/_deprecated.py
@@ -46,5 +46,5 @@ def __init__(self) -> None:
         )
         super().__init__()
 
-    def transform(self, inpt: Union[PIL.Image.Image, np.ndarray], params: Dict[str, Any]) -> torch.Tensor:
+    def transform(self, inpt: Union[PIL.Image.Image, np.ndarray], params: dict[str, Any]) -> torch.Tensor:
         return _F.to_tensor(inpt)
diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py
index c2461418a42..0808d88fed0 100644
--- a/torchvision/transforms/v2/_geometry.py
+++ b/torchvision/transforms/v2/_geometry.py
@@ -1,7 +1,8 @@
 import math
 import numbers
 import warnings
-from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, Union
+from collections.abc import Sequence
 
 import PIL.Image
 import torch
@@ -44,7 +45,7 @@ class RandomHorizontalFlip(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomHorizontalFlip
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.horizontal_flip, inpt)
 
 
@@ -62,7 +63,7 @@ class RandomVerticalFlip(_RandomApplyTransform):
 
     _v1_transform_cls = _transforms.RandomVerticalFlip
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.vertical_flip, inpt)
 
 
@@ -156,7 +157,7 @@ def __init__(
         self.max_size = max_size
         self.antialias = antialias
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(
             F.resize,
             inpt,
@@ -189,7 +190,7 @@ def __init__(self, size: Union[int, Sequence[int]]):
         super().__init__()
         self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.center_crop, inpt, output_size=self.size)
 
 
@@ -246,8 +247,8 @@ class RandomResizedCrop(Transform):
     def __init__(
         self,
         size: Union[int, Sequence[int]],
-        scale: Tuple[float, float] = (0.08, 1.0),
-        ratio: Tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
+        scale: tuple[float, float] = (0.08, 1.0),
+        ratio: tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         antialias: Optional[bool] = True,
     ) -> None:
@@ -268,7 +269,7 @@ def __init__(
 
         self._log_ratio = torch.log(torch.tensor(self.ratio))
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         height, width = query_size(flat_inputs)
         area = height * width
 
@@ -306,7 +307,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(top=i, left=j, height=h, width=w)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(
             F.resized_crop, inpt, **params, size=self.size, interpolation=self.interpolation, antialias=self.antialias
         )
@@ -363,10 +364,10 @@ def _call_kernel(self, functional: Callable, inpt: Any, *args: Any, **kwargs: An
             )
         return super()._call_kernel(functional, inpt, *args, **kwargs)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.five_crop, inpt, self.size)
 
-    def check_inputs(self, flat_inputs: List[Any]) -> None:
+    def check_inputs(self, flat_inputs: list[Any]) -> None:
         if has_any(flat_inputs, tv_tensors.BoundingBoxes, tv_tensors.Mask):
             raise TypeError(f"BoundingBoxes'es and Mask's are not supported by {type(self).__name__}()")
 
@@ -408,11 +409,11 @@ def _call_kernel(self, functional: Callable, inpt: Any, *args: Any, **kwargs: An
             )
         return super()._call_kernel(functional, inpt, *args, **kwargs)
 
-    def check_inputs(self, flat_inputs: List[Any]) -> None:
+    def check_inputs(self, flat_inputs: list[Any]) -> None:
         if has_any(flat_inputs, tv_tensors.BoundingBoxes, tv_tensors.Mask):
             raise TypeError(f"BoundingBoxes'es and Mask's are not supported by {type(self).__name__}()")
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.ten_crop, inpt, self.size, vertical_flip=self.vertical_flip)
 
 
@@ -456,7 +457,7 @@ class Pad(Transform):
 
     _v1_transform_cls = _transforms.Pad
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         params = super()._extract_params_for_v1_transform()
 
         if not (params["fill"] is None or isinstance(params["fill"], (int, float))):
@@ -467,7 +468,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
     def __init__(
         self,
         padding: Union[int, Sequence[int]],
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
         padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
     ) -> None:
         super().__init__()
@@ -483,7 +484,7 @@ def __init__(
         self._fill = _setup_fill_arg(fill)
         self.padding_mode = padding_mode
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(F.pad, inpt, padding=self.padding, fill=fill, padding_mode=self.padding_mode)  # type: ignore[arg-type]
 
@@ -520,7 +521,7 @@ class RandomZoomOut(_RandomApplyTransform):
 
     def __init__(
         self,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
         side_range: Sequence[float] = (1.0, 4.0),
         p: float = 0.5,
     ) -> None:
@@ -535,7 +536,7 @@ def __init__(
         if side_range[0] < 1.0 or side_range[0] > side_range[1]:
             raise ValueError(f"Invalid side range provided {side_range}.")
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         orig_h, orig_w = query_size(flat_inputs)
 
         r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
@@ -551,7 +552,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(padding=padding)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(F.pad, inpt, **params, fill=fill)
 
@@ -602,8 +603,8 @@ def __init__(
         degrees: Union[numbers.Number, Sequence],
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
         expand: bool = False,
-        center: Optional[List[float]] = None,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        center: Optional[list[float]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
     ) -> None:
         super().__init__()
         self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
@@ -618,11 +619,11 @@ def __init__(
 
         self.center = center
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         angle = torch.empty(1).uniform_(self.degrees[0], self.degrees[1]).item()
         return dict(angle=angle)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(
             F.rotate,
@@ -684,8 +685,8 @@ def __init__(
         scale: Optional[Sequence[float]] = None,
         shear: Optional[Union[int, float, Sequence[float]]] = None,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
-        center: Optional[List[float]] = None,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
+        center: Optional[list[float]] = None,
     ) -> None:
         super().__init__()
         self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
@@ -716,7 +717,7 @@ def __init__(
 
         self.center = center
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         height, width = query_size(flat_inputs)
 
         angle = torch.empty(1).uniform_(self.degrees[0], self.degrees[1]).item()
@@ -743,7 +744,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         shear = (shear_x, shear_y)
         return dict(angle=angle, translate=translate, scale=scale, shear=shear)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(
             F.affine,
@@ -802,7 +803,7 @@ class RandomCrop(Transform):
 
     _v1_transform_cls = _transforms.RandomCrop
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         params = super()._extract_params_for_v1_transform()
 
         if not (params["fill"] is None or isinstance(params["fill"], (int, float))):
@@ -821,7 +822,7 @@ def __init__(
         size: Union[int, Sequence[int]],
         padding: Optional[Union[int, Sequence[int]]] = None,
         pad_if_needed: bool = False,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
         padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant",
     ) -> None:
         super().__init__()
@@ -839,7 +840,7 @@ def __init__(
         self._fill = _setup_fill_arg(fill)
         self.padding_mode = padding_mode
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         padded_height, padded_width = query_size(flat_inputs)
 
         if self.padding is not None:
@@ -897,7 +898,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
             padding=padding,
         )
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if params["needs_pad"]:
             fill = _get_fill(self._fill, type(inpt))
             inpt = self._call_kernel(F.pad, inpt, padding=params["padding"], fill=fill, padding_mode=self.padding_mode)
@@ -940,7 +941,7 @@ def __init__(
         distortion_scale: float = 0.5,
         p: float = 0.5,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
     ) -> None:
         super().__init__(p=p)
 
@@ -952,7 +953,7 @@ def __init__(
         self.fill = fill
         self._fill = _setup_fill_arg(fill)
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         height, width = query_size(flat_inputs)
 
         distortion_scale = self.distortion_scale
@@ -982,7 +983,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
         return dict(coefficients=perspective_coeffs)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(
             F.perspective,
@@ -1041,7 +1042,7 @@ def __init__(
         alpha: Union[float, Sequence[float]] = 50.0,
         sigma: Union[float, Sequence[float]] = 5.0,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0,
+        fill: Union[_FillType, dict[Union[type, str], _FillType]] = 0,
     ) -> None:
         super().__init__()
         self.alpha = _setup_number_or_seq(alpha, "alpha")
@@ -1051,7 +1052,7 @@ def __init__(
         self.fill = fill
         self._fill = _setup_fill_arg(fill)
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         size = list(query_size(flat_inputs))
 
         dx = torch.rand([1, 1] + size) * 2 - 1
@@ -1074,7 +1075,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
         displacement = torch.concat([dx, dy], 1).permute([0, 2, 3, 1])  # 1 x H x W x 2
         return dict(displacement=displacement)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         fill = _get_fill(self._fill, type(inpt))
         return self._call_kernel(
             F.elastic,
@@ -1118,7 +1119,7 @@ def __init__(
         max_scale: float = 1.0,
         min_aspect_ratio: float = 0.5,
         max_aspect_ratio: float = 2.0,
-        sampler_options: Optional[List[float]] = None,
+        sampler_options: Optional[list[float]] = None,
         trials: int = 40,
     ):
         super().__init__()
@@ -1132,7 +1133,7 @@ def __init__(
         self.options = sampler_options
         self.trials = trials
 
-    def check_inputs(self, flat_inputs: List[Any]) -> None:
+    def check_inputs(self, flat_inputs: list[Any]) -> None:
         if not (
             has_all(flat_inputs, tv_tensors.BoundingBoxes)
             and has_any(flat_inputs, PIL.Image.Image, tv_tensors.Image, is_pure_tensor)
@@ -1142,7 +1143,7 @@ def check_inputs(self, flat_inputs: List[Any]) -> None:
                 "and bounding boxes. Sample can also contain masks."
             )
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         orig_h, orig_w = query_size(flat_inputs)
         bboxes = get_bounding_boxes(flat_inputs)
 
@@ -1194,7 +1195,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
                 return dict(top=top, left=left, height=new_h, width=new_w, is_within_crop_area=is_within_crop_area)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
 
         if len(params) < 1:
             return inpt
@@ -1251,8 +1252,8 @@ class ScaleJitter(Transform):
 
     def __init__(
         self,
-        target_size: Tuple[int, int],
-        scale_range: Tuple[float, float] = (0.1, 2.0),
+        target_size: tuple[int, int],
+        scale_range: tuple[float, float] = (0.1, 2.0),
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         antialias: Optional[bool] = True,
     ):
@@ -1262,7 +1263,7 @@ def __init__(
         self.interpolation = interpolation
         self.antialias = antialias
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         orig_height, orig_width = query_size(flat_inputs)
 
         scale = self.scale_range[0] + torch.rand(1) * (self.scale_range[1] - self.scale_range[0])
@@ -1272,7 +1273,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(size=(new_height, new_width))
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(
             F.resize, inpt, size=params["size"], interpolation=self.interpolation, antialias=self.antialias
         )
@@ -1316,7 +1317,7 @@ class RandomShortestSize(Transform):
 
     def __init__(
         self,
-        min_size: Union[List[int], Tuple[int], int],
+        min_size: Union[list[int], tuple[int], int],
         max_size: Optional[int] = None,
         interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
         antialias: Optional[bool] = True,
@@ -1327,7 +1328,7 @@ def __init__(
         self.interpolation = interpolation
         self.antialias = antialias
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         orig_height, orig_width = query_size(flat_inputs)
 
         min_size = self.min_size[int(torch.randint(len(self.min_size), ()))]
@@ -1340,7 +1341,7 @@ def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 
         return dict(size=(new_height, new_width))
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(
             F.resize, inpt, size=params["size"], interpolation=self.interpolation, antialias=self.antialias
         )
@@ -1406,11 +1407,11 @@ def __init__(
         self.interpolation = interpolation
         self.antialias = antialias
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         size = int(torch.randint(self.min_size, self.max_size, ()))
         return dict(size=[size])
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(
             F.resize, inpt, params["size"], interpolation=self.interpolation, antialias=self.antialias
         )
diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
index 1890b43115a..bc094e81b13 100644
--- a/torchvision/transforms/v2/_meta.py
+++ b/torchvision/transforms/v2/_meta.py
@@ -19,7 +19,7 @@ def __init__(self, format: Union[str, tv_tensors.BoundingBoxFormat]) -> None:
         super().__init__()
         self.format = format
 
-    def transform(self, inpt: tv_tensors.BoundingBoxes, params: Dict[str, Any]) -> tv_tensors.BoundingBoxes:
+    def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
         return F.convert_bounding_box_format(inpt, new_format=self.format)  # type: ignore[return-value, arg-type]
 
 
@@ -32,5 +32,5 @@ class ClampBoundingBoxes(Transform):
 
     _transformed_types = (tv_tensors.BoundingBoxes,)
 
-    def transform(self, inpt: tv_tensors.BoundingBoxes, params: Dict[str, Any]) -> tv_tensors.BoundingBoxes:
+    def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
         return F.clamp_bounding_boxes(inpt)  # type: ignore[return-value]
diff --git a/torchvision/transforms/v2/_misc.py b/torchvision/transforms/v2/_misc.py
index d38a6ad8767..8e7bd862c37 100644
--- a/torchvision/transforms/v2/_misc.py
+++ b/torchvision/transforms/v2/_misc.py
@@ -1,5 +1,6 @@
 import warnings
-from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Type, Union
+from collections.abc import Sequence
 
 import PIL.Image
 
@@ -14,7 +15,7 @@
 
 # TODO: do we want/need to expose this?
 class Identity(Transform):
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return inpt
 
 
@@ -29,12 +30,12 @@ class Lambda(Transform):
 
     _transformed_types = (object,)
 
-    def __init__(self, lambd: Callable[[Any], Any], *types: Type):
+    def __init__(self, lambd: Callable[[Any], Any], *types: type):
         super().__init__()
         self.lambd = lambd
         self.types = types or self._transformed_types
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if isinstance(inpt, self.types):
             return self.lambd(inpt)
         else:
@@ -103,7 +104,7 @@ def check_inputs(self, sample: Any) -> Any:
         if has_any(sample, PIL.Image.Image):
             raise TypeError(f"{type(self).__name__}() does not support PIL images.")
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         shape = inpt.shape
         n = shape[-3] * shape[-2] * shape[-1]
         if n != self.transformation_matrix.shape[0]:
@@ -161,7 +162,7 @@ def check_inputs(self, sample: Any) -> Any:
         if has_any(sample, PIL.Image.Image):
             raise TypeError(f"{type(self).__name__}() does not support PIL images.")
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.normalize, inpt, mean=self.mean, std=self.std, inplace=self.inplace)
 
 
@@ -197,11 +198,11 @@ def __init__(
         if not 0.0 < self.sigma[0] <= self.sigma[1]:
             raise ValueError(f"sigma values should be positive and of the form (min, max). Got {self.sigma}")
 
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         sigma = torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item()
         return dict(sigma=[sigma, sigma])
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.gaussian_blur, inpt, self.kernel_size, **params)
 
 
@@ -228,7 +229,7 @@ def __init__(self, mean: float = 0.0, sigma: float = 0.1, clip=True) -> None:
         self.sigma = sigma
         self.clip = clip
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.gaussian_noise, inpt, mean=self.mean, sigma=self.sigma, clip=self.clip)
 
 
@@ -252,7 +253,7 @@ class ToDtype(Transform):
     _transformed_types = (torch.Tensor,)
 
     def __init__(
-        self, dtype: Union[torch.dtype, Dict[Union[Type, str], Optional[torch.dtype]]], scale: bool = False
+        self, dtype: Union[torch.dtype, dict[Union[type, str], Optional[torch.dtype]]], scale: bool = False
     ) -> None:
         super().__init__()
 
@@ -272,7 +273,7 @@ def __init__(
         self.dtype = dtype
         self.scale = scale
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         if isinstance(self.dtype, torch.dtype):
             # For consistency / BC with ConvertImageDtype, we only care about images or videos when dtype
             # is a simple torch.dtype
@@ -335,7 +336,7 @@ def __init__(self, dtype: torch.dtype = torch.float32) -> None:
         super().__init__()
         self.dtype = dtype
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.to_dtype, inpt, dtype=self.dtype, scale=True)
 
 
@@ -436,7 +437,7 @@ def forward(self, *inputs: Any) -> Any:
 
         return tree_unflatten(flat_outputs, spec)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         is_label = params["labels"] is not None and any(inpt is label for label in params["labels"])
         is_bounding_boxes_or_mask = isinstance(inpt, (tv_tensors.BoundingBoxes, tv_tensors.Mask))
 
diff --git a/torchvision/transforms/v2/_temporal.py b/torchvision/transforms/v2/_temporal.py
index 687b50188a8..4cc2ee7c3af 100644
--- a/torchvision/transforms/v2/_temporal.py
+++ b/torchvision/transforms/v2/_temporal.py
@@ -22,5 +22,5 @@ def __init__(self, num_samples: int):
         super().__init__()
         self.num_samples = num_samples
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         return self._call_kernel(F.uniform_temporal_subsample, inpt, self.num_samples)
diff --git a/torchvision/transforms/v2/_transform.py b/torchvision/transforms/v2/_transform.py
index 5f274589709..983d9b0f426 100644
--- a/torchvision/transforms/v2/_transform.py
+++ b/torchvision/transforms/v2/_transform.py
@@ -23,13 +23,13 @@ class Transform(nn.Module):
 
     # Class attribute defining transformed types. Other types are passed-through without any transformation
     # We support both Types and callables that are able to do further checks on the type of the input.
-    _transformed_types: Tuple[Union[Type, Callable[[Any], bool]], ...] = (torch.Tensor, PIL.Image.Image)
+    _transformed_types: tuple[type | Callable[[Any], bool], ...] = (torch.Tensor, PIL.Image.Image)
 
     def __init__(self) -> None:
         super().__init__()
         _log_api_usage_once(self)
 
-    def check_inputs(self, flat_inputs: List[Any]) -> None:
+    def check_inputs(self, flat_inputs: list[Any]) -> None:
         pass
 
     # When v2 was introduced, this method was private and called
@@ -38,7 +38,7 @@ def check_inputs(self, flat_inputs: List[Any]) -> None:
     # methods for v2 transforms: it's the v1's `get_params()` that we have  to
     # keep in order to guarantee 100% BC with v1. (It's defined in
     # __init_subclass__ below).
-    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+    def make_params(self, flat_inputs: list[Any]) -> dict[str, Any]:
         """Method to override for custom transforms.
 
         See :ref:`sphx_glr_auto_examples_transforms_plot_custom_transforms.py`"""
@@ -48,7 +48,7 @@ def _call_kernel(self, functional: Callable, inpt: Any, *args: Any, **kwargs: An
         kernel = _get_kernel(functional, type(inpt), allow_passthrough=True)
         return kernel(inpt, *args, **kwargs)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> Any:
         """Method to override for custom transforms.
 
         See :ref:`sphx_glr_auto_examples_transforms_plot_custom_transforms.py`"""
@@ -72,7 +72,7 @@ def forward(self, *inputs: Any) -> Any:
 
         return tree_unflatten(flat_outputs, spec)
 
-    def _needs_transform_list(self, flat_inputs: List[Any]) -> List[bool]:
+    def _needs_transform_list(self, flat_inputs: list[Any]) -> list[bool]:
         # Below is a heuristic on how to deal with pure tensor inputs:
         # 1. Pure tensors, i.e. tensors that are not a tv_tensor, are passed through if there is an explicit image
         #    (`tv_tensors.Image` or `PIL.Image.Image`) or video (`tv_tensors.Video`) in the sample.
@@ -122,7 +122,7 @@ def extra_repr(self) -> str:
     #    the v2 transform. See `__init_subclass__` for details.
     # 2. The v2 transform will be JIT scriptable. See `_extract_params_for_v1_transform` and `__prepare_scriptable__`
     #    for details.
-    _v1_transform_cls: Optional[Type[nn.Module]] = None
+    _v1_transform_cls: type[nn.Module] | None = None
 
     def __init_subclass__(cls) -> None:
         # Since `get_params` is a `@staticmethod`, we have to bind it to the class itself rather than to an instance.
@@ -130,7 +130,7 @@ def __init_subclass__(cls) -> None:
         if cls._v1_transform_cls is not None and hasattr(cls._v1_transform_cls, "get_params"):
             cls.get_params = staticmethod(cls._v1_transform_cls.get_params)  # type: ignore[attr-defined]
 
-    def _extract_params_for_v1_transform(self) -> Dict[str, Any]:
+    def _extract_params_for_v1_transform(self) -> dict[str, Any]:
         # This method is called by `__prepare_scriptable__` to instantiate the equivalent v1 transform from the current
         # v2 transform instance. It extracts all available public attributes that are specific to that transform and
         # not `nn.Module` in general.
diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py
index bf9f7185239..ee81d77f723 100644
--- a/torchvision/transforms/v2/_type_conversion.py
+++ b/torchvision/transforms/v2/_type_conversion.py
@@ -20,7 +20,7 @@ class PILToTensor(Transform):
 
     _transformed_types = (PIL.Image.Image,)
 
-    def transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Tensor:
+    def transform(self, inpt: PIL.Image.Image, params: dict[str, Any]) -> torch.Tensor:
         return F.pil_to_tensor(inpt)
 
 
@@ -34,7 +34,7 @@ class ToImage(Transform):
     _transformed_types = (is_pure_tensor, PIL.Image.Image, np.ndarray)
 
     def transform(
-        self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any]
+        self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: dict[str, Any]
     ) -> tv_tensors.Image:
         return F.to_image(inpt)
 
@@ -67,7 +67,7 @@ def __init__(self, mode: Optional[str] = None) -> None:
         self.mode = mode
 
     def transform(
-        self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any]
+        self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: dict[str, Any]
     ) -> PIL.Image.Image:
         return F.to_pil_image(inpt, mode=self.mode)
 
@@ -80,5 +80,5 @@ class ToPureTensor(Transform):
 
     _transformed_types = (tv_tensors.TVTensor,)
 
-    def transform(self, inpt: Any, params: Dict[str, Any]) -> torch.Tensor:
+    def transform(self, inpt: Any, params: dict[str, Any]) -> torch.Tensor:
         return inpt.as_subclass(torch.Tensor)
diff --git a/torchvision/transforms/v2/_utils.py b/torchvision/transforms/v2/_utils.py
index dd65ca4d9c9..228f09ff2ac 100644
--- a/torchvision/transforms/v2/_utils.py
+++ b/torchvision/transforms/v2/_utils.py
@@ -4,7 +4,8 @@
 import numbers
 from contextlib import suppress
 
-from typing import Any, Callable, Dict, List, Literal, Sequence, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Literal, Tuple, Type, Union
+from collections.abc import Sequence
 
 import PIL.Image
 import torch
@@ -18,7 +19,7 @@
 from torchvision.transforms.v2.functional._utils import _FillType, _FillTypeJIT
 
 
-def _setup_number_or_seq(arg: Union[int, float, Sequence[Union[int, float]]], name: str) -> Sequence[float]:
+def _setup_number_or_seq(arg: int | float | Sequence[int | float], name: str) -> Sequence[float]:
     if not isinstance(arg, (int, float, Sequence)):
         raise TypeError(f"{name} should be a number or a sequence of numbers. Got {type(arg)}")
     if isinstance(arg, Sequence) and len(arg) not in (1, 2):
@@ -38,7 +39,7 @@ def _setup_number_or_seq(arg: Union[int, float, Sequence[Union[int, float]]], na
     return arg
 
 
-def _check_fill_arg(fill: Union[_FillType, Dict[Union[Type, str], _FillType]]) -> None:
+def _check_fill_arg(fill: _FillType | dict[type | str, _FillType]) -> None:
     if isinstance(fill, dict):
         for value in fill.values():
             _check_fill_arg(value)
@@ -60,7 +61,7 @@ def _convert_fill_arg(fill: _FillType) -> _FillTypeJIT:
     return fill  # type: ignore[return-value]
 
 
-def _setup_fill_arg(fill: Union[_FillType, Dict[Union[Type, str], _FillType]]) -> Dict[Union[Type, str], _FillTypeJIT]:
+def _setup_fill_arg(fill: _FillType | dict[type | str, _FillType]) -> dict[type | str, _FillTypeJIT]:
     _check_fill_arg(fill)
 
     if isinstance(fill, dict):
@@ -80,7 +81,7 @@ def _get_fill(fill_dict, inpt_type):
         RuntimeError("This should never happen, please open an issue on the torchvision repo if you hit this.")
 
 
-def _check_padding_arg(padding: Union[int, Sequence[int]]) -> None:
+def _check_padding_arg(padding: int | Sequence[int]) -> None:
     if not isinstance(padding, (numbers.Number, tuple, list)):
         raise TypeError("Got inappropriate padding arg")
 
@@ -139,7 +140,7 @@ def _find_labels_default_heuristic(inputs: Any) -> torch.Tensor:
     return inputs[candidate_key]
 
 
-def _parse_labels_getter(labels_getter: Union[str, Callable[[Any], Any], None]) -> Callable[[Any], Any]:
+def _parse_labels_getter(labels_getter: str | Callable[[Any], Any] | None) -> Callable[[Any], Any]:
     if labels_getter == "default":
         return _find_labels_default_heuristic
     elif callable(labels_getter):
@@ -150,7 +151,7 @@ def _parse_labels_getter(labels_getter: Union[str, Callable[[Any], Any], None])
         raise ValueError(f"labels_getter should either be 'default', a callable, or None, but got {labels_getter}.")
 
 
-def get_bounding_boxes(flat_inputs: List[Any]) -> tv_tensors.BoundingBoxes:
+def get_bounding_boxes(flat_inputs: list[Any]) -> tv_tensors.BoundingBoxes:
     """Return the Bounding Boxes in the input.
 
     Assumes only one ``BoundingBoxes`` object is present.
@@ -162,7 +163,7 @@ def get_bounding_boxes(flat_inputs: List[Any]) -> tv_tensors.BoundingBoxes:
         raise ValueError("No bounding boxes were found in the sample")
 
 
-def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
+def query_chw(flat_inputs: list[Any]) -> tuple[int, int, int]:
     """Return Channel, Height, and Width."""
     chws = {
         tuple(get_dimensions(inpt))
@@ -177,7 +178,7 @@ def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
     return c, h, w
 
 
-def query_size(flat_inputs: List[Any]) -> Tuple[int, int]:
+def query_size(flat_inputs: list[Any]) -> tuple[int, int]:
     """Return Height and Width."""
     sizes = {
         tuple(get_size(inpt))
@@ -202,21 +203,21 @@ def query_size(flat_inputs: List[Any]) -> Tuple[int, int]:
     return h, w
 
 
-def check_type(obj: Any, types_or_checks: Tuple[Union[Type, Callable[[Any], bool]], ...]) -> bool:
+def check_type(obj: Any, types_or_checks: tuple[type | Callable[[Any], bool], ...]) -> bool:
     for type_or_check in types_or_checks:
         if isinstance(obj, type_or_check) if isinstance(type_or_check, type) else type_or_check(obj):
             return True
     return False
 
 
-def has_any(flat_inputs: List[Any], *types_or_checks: Union[Type, Callable[[Any], bool]]) -> bool:
+def has_any(flat_inputs: list[Any], *types_or_checks: type | Callable[[Any], bool]) -> bool:
     for inpt in flat_inputs:
         if check_type(inpt, types_or_checks):
             return True
     return False
 
 
-def has_all(flat_inputs: List[Any], *types_or_checks: Union[Type, Callable[[Any], bool]]) -> bool:
+def has_all(flat_inputs: list[Any], *types_or_checks: type | Callable[[Any], bool]) -> bool:
     for type_or_check in types_or_checks:
         for inpt in flat_inputs:
             if isinstance(inpt, type_or_check) if isinstance(type_or_check, type) else type_or_check(inpt):
diff --git a/torchvision/transforms/v2/functional/_color.py b/torchvision/transforms/v2/functional/_color.py
index eb75f58cb7a..64b5d578801 100644
--- a/torchvision/transforms/v2/functional/_color.py
+++ b/torchvision/transforms/v2/functional/_color.py
@@ -679,7 +679,7 @@ def invert_video(video: torch.Tensor) -> torch.Tensor:
     return invert_image(video)
 
 
-def permute_channels(inpt: torch.Tensor, permutation: List[int]) -> torch.Tensor:
+def permute_channels(inpt: torch.Tensor, permutation: list[int]) -> torch.Tensor:
     """Permute the channels of the input according to the given permutation.
 
     This function supports plain :class:`~torch.Tensor`'s, :class:`PIL.Image.Image`'s, and
@@ -712,7 +712,7 @@ def permute_channels(inpt: torch.Tensor, permutation: List[int]) -> torch.Tensor
 
 @_register_kernel_internal(permute_channels, torch.Tensor)
 @_register_kernel_internal(permute_channels, tv_tensors.Image)
-def permute_channels_image(image: torch.Tensor, permutation: List[int]) -> torch.Tensor:
+def permute_channels_image(image: torch.Tensor, permutation: list[int]) -> torch.Tensor:
     shape = image.shape
     num_channels, height, width = shape[-3:]
 
@@ -730,10 +730,10 @@ def permute_channels_image(image: torch.Tensor, permutation: List[int]) -> torch
 
 
 @_register_kernel_internal(permute_channels, PIL.Image.Image)
-def _permute_channels_image_pil(image: PIL.Image.Image, permutation: List[int]) -> PIL.Image.Image:
+def _permute_channels_image_pil(image: PIL.Image.Image, permutation: list[int]) -> PIL.Image.Image:
     return to_pil_image(permute_channels_image(pil_to_tensor(image), permutation=permutation))
 
 
 @_register_kernel_internal(permute_channels, tv_tensors.Video)
-def permute_channels_video(video: torch.Tensor, permutation: List[int]) -> torch.Tensor:
+def permute_channels_video(video: torch.Tensor, permutation: list[int]) -> torch.Tensor:
     return permute_channels_image(video, permutation=permutation)
diff --git a/torchvision/transforms/v2/functional/_deprecated.py b/torchvision/transforms/v2/functional/_deprecated.py
index 116ea31587a..19545c246ac 100644
--- a/torchvision/transforms/v2/functional/_deprecated.py
+++ b/torchvision/transforms/v2/functional/_deprecated.py
@@ -16,7 +16,7 @@ def to_tensor(inpt: Any) -> torch.Tensor:
     return _F.to_tensor(inpt)
 
 
-def get_image_size(inpt: torch.Tensor) -> List[int]:
+def get_image_size(inpt: torch.Tensor) -> list[int]:
     warnings.warn(
         "The function `get_image_size(...)` is deprecated and will be removed in a future release. "
         "Instead, please use `get_size(...)` which returns `[h, w]` instead of `[w, h]`."
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index da080e437c9..8d8d850760c 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -1,7 +1,8 @@
 import math
 import numbers
 import warnings
-from typing import Any, List, Optional, Sequence, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
+from collections.abc import Sequence
 
 import PIL.Image
 import torch
@@ -66,7 +67,7 @@ def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor:
 
 
 def horizontal_flip_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: tv_tensors.BoundingBoxFormat, canvas_size: Tuple[int, int]
+    bounding_boxes: torch.Tensor, format: tv_tensors.BoundingBoxFormat, canvas_size: tuple[int, int]
 ) -> torch.Tensor:
     shape = bounding_boxes.shape
 
@@ -123,7 +124,7 @@ def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
 
 
 def vertical_flip_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: tv_tensors.BoundingBoxFormat, canvas_size: Tuple[int, int]
+    bounding_boxes: torch.Tensor, format: tv_tensors.BoundingBoxFormat, canvas_size: tuple[int, int]
 ) -> torch.Tensor:
     shape = bounding_boxes.shape
 
@@ -159,8 +160,8 @@ def vertical_flip_video(video: torch.Tensor) -> torch.Tensor:
 
 
 def _compute_resized_output_size(
-    canvas_size: Tuple[int, int], size: Optional[List[int]], max_size: Optional[int] = None
-) -> List[int]:
+    canvas_size: tuple[int, int], size: Optional[list[int]], max_size: Optional[int] = None
+) -> list[int]:
     if isinstance(size, int):
         size = [size]
     elif max_size is not None and size is not None and len(size) != 1:
@@ -173,7 +174,7 @@ def _compute_resized_output_size(
 
 def resize(
     inpt: torch.Tensor,
-    size: Optional[List[int]],
+    size: Optional[list[int]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     max_size: Optional[int] = None,
     antialias: Optional[bool] = True,
@@ -206,7 +207,7 @@ def _do_native_uint8_resize_on_cpu(interpolation: InterpolationMode) -> bool:
 @_register_kernel_internal(resize, tv_tensors.Image)
 def resize_image(
     image: torch.Tensor,
-    size: Optional[List[int]],
+    size: Optional[list[int]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     max_size: Optional[int] = None,
     antialias: Optional[bool] = True,
@@ -310,7 +311,7 @@ def __resize_image_pil_dispatch(
     return _resize_image_pil(image, size=size, interpolation=interpolation, max_size=max_size)
 
 
-def resize_mask(mask: torch.Tensor, size: Optional[List[int]], max_size: Optional[int] = None) -> torch.Tensor:
+def resize_mask(mask: torch.Tensor, size: Optional[list[int]], max_size: Optional[int] = None) -> torch.Tensor:
     if mask.ndim < 3:
         mask = mask.unsqueeze(0)
         needs_squeeze = True
@@ -327,7 +328,7 @@ def resize_mask(mask: torch.Tensor, size: Optional[List[int]], max_size: Optiona
 
 @_register_kernel_internal(resize, tv_tensors.Mask, tv_tensor_wrapper=False)
 def _resize_mask_dispatch(
-    inpt: tv_tensors.Mask, size: List[int], max_size: Optional[int] = None, **kwargs: Any
+    inpt: tv_tensors.Mask, size: list[int], max_size: Optional[int] = None, **kwargs: Any
 ) -> tv_tensors.Mask:
     output = resize_mask(inpt.as_subclass(torch.Tensor), size, max_size=max_size)
     return tv_tensors.wrap(output, like=inpt)
@@ -335,10 +336,10 @@ def _resize_mask_dispatch(
 
 def resize_bounding_boxes(
     bounding_boxes: torch.Tensor,
-    canvas_size: Tuple[int, int],
-    size: Optional[List[int]],
+    canvas_size: tuple[int, int],
+    size: Optional[list[int]],
     max_size: Optional[int] = None,
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+) -> tuple[torch.Tensor, tuple[int, int]]:
     old_height, old_width = canvas_size
     new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size)
 
@@ -356,7 +357,7 @@ def resize_bounding_boxes(
 
 @_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _resize_bounding_boxes_dispatch(
-    inpt: tv_tensors.BoundingBoxes, size: Optional[List[int]], max_size: Optional[int] = None, **kwargs: Any
+    inpt: tv_tensors.BoundingBoxes, size: Optional[list[int]], max_size: Optional[int] = None, **kwargs: Any
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = resize_bounding_boxes(
         inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size
@@ -367,7 +368,7 @@ def _resize_bounding_boxes_dispatch(
 @_register_kernel_internal(resize, tv_tensors.Video)
 def resize_video(
     video: torch.Tensor,
-    size: Optional[List[int]],
+    size: Optional[list[int]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     max_size: Optional[int] = None,
     antialias: Optional[bool] = True,
@@ -378,12 +379,12 @@ def resize_video(
 def affine(
     inpt: torch.Tensor,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
 ) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.RandomAffine` for details."""
     if torch.jit.is_scripting():
@@ -415,12 +416,12 @@ def affine(
 
 def _affine_parse_args(
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: InterpolationMode = InterpolationMode.NEAREST,
-    center: Optional[List[float]] = None,
-) -> Tuple[float, List[float], List[float], Optional[List[float]]]:
+    center: Optional[list[float]] = None,
+) -> tuple[float, list[float], list[float], Optional[list[float]]]:
     if not isinstance(angle, (int, float)):
         raise TypeError("Argument angle should be int or float")
 
@@ -467,8 +468,8 @@ def _affine_parse_args(
 
 
 def _get_inverse_affine_matrix(
-    center: List[float], angle: float, translate: List[float], scale: float, shear: List[float], inverted: bool = True
-) -> List[float]:
+    center: list[float], angle: float, translate: list[float], scale: float, shear: list[float], inverted: bool = True
+) -> list[float]:
     # Helper method to compute inverse matrix for affine transformation
 
     # Pillow requires inverse affine transformation matrix:
@@ -527,14 +528,14 @@ def _get_inverse_affine_matrix(
     return matrix
 
 
-def _compute_affine_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+def _compute_affine_output_size(matrix: list[float], w: int, h: int) -> tuple[int, int]:
     if torch.compiler.is_compiling() and not torch.jit.is_scripting():
         return _compute_affine_output_size_python(matrix, w, h)
     else:
         return _compute_affine_output_size_tensor(matrix, w, h)
 
 
-def _compute_affine_output_size_tensor(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+def _compute_affine_output_size_tensor(matrix: list[float], w: int, h: int) -> tuple[int, int]:
     # Inspired of PIL implementation:
     # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
 
@@ -569,7 +570,7 @@ def _compute_affine_output_size_tensor(matrix: List[float], w: int, h: int) -> T
     return int(size[0]), int(size[1])  # w, h
 
 
-def _compute_affine_output_size_python(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+def _compute_affine_output_size_python(matrix: list[float], w: int, h: int) -> tuple[int, int]:
     # Mostly copied from PIL implementation:
     # The only difference is with transformed points as input matrix has zero translation part here and
     # PIL has a centered translation part.
@@ -641,11 +642,11 @@ def _apply_grid_transform(img: torch.Tensor, grid: torch.Tensor, mode: str, fill
 
 def _assert_grid_transform_inputs(
     image: torch.Tensor,
-    matrix: Optional[List[float]],
+    matrix: Optional[list[float]],
     interpolation: str,
     fill: _FillTypeJIT,
-    supported_interpolation_modes: List[str],
-    coeffs: Optional[List[float]] = None,
+    supported_interpolation_modes: list[str],
+    coeffs: Optional[list[float]] = None,
 ) -> None:
     if matrix is not None:
         if not isinstance(matrix, list):
@@ -704,12 +705,12 @@ def _affine_grid(
 def affine_image(
     image: torch.Tensor,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
 ) -> torch.Tensor:
     interpolation = _check_interpolation(interpolation)
 
@@ -737,12 +738,12 @@ def affine_image(
 def _affine_image_pil(
     image: PIL.Image.Image,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
 ) -> PIL.Image.Image:
     interpolation = _check_interpolation(interpolation)
     angle, translate, shear, center = _affine_parse_args(angle, translate, scale, shear, interpolation, center)
@@ -761,14 +762,14 @@ def _affine_image_pil(
 def _affine_bounding_boxes_with_expand(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
+    canvas_size: tuple[int, int],
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
-    center: Optional[List[float]] = None,
+    shear: list[float],
+    center: Optional[list[float]] = None,
     expand: bool = False,
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+) -> tuple[torch.Tensor, tuple[int, int]]:
     if bounding_boxes.numel() == 0:
         return bounding_boxes, canvas_size
 
@@ -850,12 +851,12 @@ def _affine_bounding_boxes_with_expand(
 def affine_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
+    canvas_size: tuple[int, int],
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
-    center: Optional[List[float]] = None,
+    shear: list[float],
+    center: Optional[list[float]] = None,
 ) -> torch.Tensor:
     out_box, _ = _affine_bounding_boxes_with_expand(
         bounding_boxes,
@@ -875,10 +876,10 @@ def affine_bounding_boxes(
 def _affine_bounding_boxes_dispatch(
     inpt: tv_tensors.BoundingBoxes,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
-    center: Optional[List[float]] = None,
+    shear: list[float],
+    center: Optional[list[float]] = None,
     **kwargs,
 ) -> tv_tensors.BoundingBoxes:
     output = affine_bounding_boxes(
@@ -897,11 +898,11 @@ def _affine_bounding_boxes_dispatch(
 def affine_mask(
     mask: torch.Tensor,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
 ) -> torch.Tensor:
     if mask.ndim < 3:
         mask = mask.unsqueeze(0)
@@ -930,11 +931,11 @@ def affine_mask(
 def _affine_mask_dispatch(
     inpt: tv_tensors.Mask,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     **kwargs,
 ) -> tv_tensors.Mask:
     output = affine_mask(
@@ -953,12 +954,12 @@ def _affine_mask_dispatch(
 def affine_video(
     video: torch.Tensor,
     angle: Union[int, float],
-    translate: List[float],
+    translate: list[float],
     scale: float,
-    shear: List[float],
+    shear: list[float],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     fill: _FillTypeJIT = None,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
 ) -> torch.Tensor:
     return affine_image(
         video,
@@ -977,7 +978,7 @@ def rotate(
     angle: float,
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.RandomRotation` for details."""
@@ -997,7 +998,7 @@ def rotate_image(
     angle: float,
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
     angle = angle % 360  # shift angle to [0, 360) range
@@ -1045,7 +1046,7 @@ def _rotate_image_pil(
     angle: float,
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
 ) -> PIL.Image.Image:
     interpolation = _check_interpolation(interpolation)
@@ -1058,11 +1059,11 @@ def _rotate_image_pil(
 def rotate_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
+    canvas_size: tuple[int, int],
     angle: float,
     expand: bool = False,
-    center: Optional[List[float]] = None,
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+    center: Optional[list[float]] = None,
+) -> tuple[torch.Tensor, tuple[int, int]]:
     return _affine_bounding_boxes_with_expand(
         bounding_boxes,
         format=format,
@@ -1078,7 +1079,7 @@ def rotate_bounding_boxes(
 
 @_register_kernel_internal(rotate, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _rotate_bounding_boxes_dispatch(
-    inpt: tv_tensors.BoundingBoxes, angle: float, expand: bool = False, center: Optional[List[float]] = None, **kwargs
+    inpt: tv_tensors.BoundingBoxes, angle: float, expand: bool = False, center: Optional[list[float]] = None, **kwargs
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = rotate_bounding_boxes(
         inpt.as_subclass(torch.Tensor),
@@ -1095,7 +1096,7 @@ def rotate_mask(
     mask: torch.Tensor,
     angle: float,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
     if mask.ndim < 3:
@@ -1124,7 +1125,7 @@ def _rotate_mask_dispatch(
     inpt: tv_tensors.Mask,
     angle: float,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
     **kwargs,
 ) -> tv_tensors.Mask:
@@ -1138,7 +1139,7 @@ def rotate_video(
     angle: float,
     interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
     expand: bool = False,
-    center: Optional[List[float]] = None,
+    center: Optional[list[float]] = None,
     fill: _FillTypeJIT = None,
 ) -> torch.Tensor:
     return rotate_image(video, angle, interpolation=interpolation, expand=expand, fill=fill, center=center)
@@ -1146,8 +1147,8 @@ def rotate_video(
 
 def pad(
     inpt: torch.Tensor,
-    padding: List[int],
-    fill: Optional[Union[int, float, List[float]]] = None,
+    padding: list[int],
+    fill: Optional[Union[int, float, list[float]]] = None,
     padding_mode: str = "constant",
 ) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.Pad` for details."""
@@ -1160,7 +1161,7 @@ def pad(
     return kernel(inpt, padding=padding, fill=fill, padding_mode=padding_mode)
 
 
-def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]:
+def _parse_pad_padding(padding: Union[int, list[int]]) -> list[int]:
     if isinstance(padding, int):
         pad_left = pad_right = pad_top = pad_bottom = padding
     elif isinstance(padding, (tuple, list)):
@@ -1188,8 +1189,8 @@ def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]:
 @_register_kernel_internal(pad, tv_tensors.Image)
 def pad_image(
     image: torch.Tensor,
-    padding: List[int],
-    fill: Optional[Union[int, float, List[float]]] = None,
+    padding: list[int],
+    fill: Optional[Union[int, float, list[float]]] = None,
     padding_mode: str = "constant",
 ) -> torch.Tensor:
     # Be aware that while `padding` has order `[left, top, right, bottom]`, `torch_padding` uses
@@ -1216,7 +1217,7 @@ def pad_image(
 
 def _pad_with_scalar_fill(
     image: torch.Tensor,
-    torch_padding: List[int],
+    torch_padding: list[int],
     fill: Union[int, float],
     padding_mode: str,
 ) -> torch.Tensor:
@@ -1262,8 +1263,8 @@ def _pad_with_scalar_fill(
 # TODO: This should be removed once torch_pad supports non-scalar padding values
 def _pad_with_vector_fill(
     image: torch.Tensor,
-    torch_padding: List[int],
-    fill: List[float],
+    torch_padding: list[int],
+    fill: list[float],
     padding_mode: str,
 ) -> torch.Tensor:
     if padding_mode != "constant":
@@ -1294,8 +1295,8 @@ def _pad_with_vector_fill(
 @_register_kernel_internal(pad, tv_tensors.Mask)
 def pad_mask(
     mask: torch.Tensor,
-    padding: List[int],
-    fill: Optional[Union[int, float, List[float]]] = None,
+    padding: list[int],
+    fill: Optional[Union[int, float, list[float]]] = None,
     padding_mode: str = "constant",
 ) -> torch.Tensor:
     if fill is None:
@@ -1321,10 +1322,10 @@ def pad_mask(
 def pad_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
-    padding: List[int],
+    canvas_size: tuple[int, int],
+    padding: list[int],
     padding_mode: str = "constant",
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+) -> tuple[torch.Tensor, tuple[int, int]]:
     if padding_mode not in ["constant"]:
         # TODO: add support of other padding modes
         raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")
@@ -1347,7 +1348,7 @@ def pad_bounding_boxes(
 
 @_register_kernel_internal(pad, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _pad_bounding_boxes_dispatch(
-    inpt: tv_tensors.BoundingBoxes, padding: List[int], padding_mode: str = "constant", **kwargs
+    inpt: tv_tensors.BoundingBoxes, padding: list[int], padding_mode: str = "constant", **kwargs
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = pad_bounding_boxes(
         inpt.as_subclass(torch.Tensor),
@@ -1362,8 +1363,8 @@ def _pad_bounding_boxes_dispatch(
 @_register_kernel_internal(pad, tv_tensors.Video)
 def pad_video(
     video: torch.Tensor,
-    padding: List[int],
-    fill: Optional[Union[int, float, List[float]]] = None,
+    padding: list[int],
+    fill: Optional[Union[int, float, list[float]]] = None,
     padding_mode: str = "constant",
 ) -> torch.Tensor:
     return pad_image(video, padding, fill=fill, padding_mode=padding_mode)
@@ -1411,7 +1412,7 @@ def crop_bounding_boxes(
     left: int,
     height: int,
     width: int,
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+) -> tuple[torch.Tensor, tuple[int, int]]:
 
     # Crop or implicit pad if left and/or top have negative values:
     if format == tv_tensors.BoundingBoxFormat.XYXY:
@@ -1458,11 +1459,11 @@ def crop_video(video: torch.Tensor, top: int, left: int, height: int, width: int
 
 def perspective(
     inpt: torch.Tensor,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
 ) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.RandomPerspective` for details."""
     if torch.jit.is_scripting():
@@ -1488,7 +1489,7 @@ def perspective(
     )
 
 
-def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
+def _perspective_grid(coeffs: list[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
     # https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/
     # src/libImaging/Geometry.c#L394
 
@@ -1519,10 +1520,10 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype,
 
 
 def _perspective_coefficients(
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
-    coefficients: Optional[List[float]],
-) -> List[float]:
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
+    coefficients: Optional[list[float]],
+) -> list[float]:
     if coefficients is not None:
         if startpoints is not None and endpoints is not None:
             raise ValueError("The startpoints/endpoints and the coefficients shouldn't be defined concurrently.")
@@ -1539,11 +1540,11 @@ def _perspective_coefficients(
 @_register_kernel_internal(perspective, tv_tensors.Image)
 def perspective_image(
     image: torch.Tensor,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
 ) -> torch.Tensor:
     perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
     interpolation = _check_interpolation(interpolation)
@@ -1566,11 +1567,11 @@ def perspective_image(
 @_register_kernel_internal(perspective, PIL.Image.Image)
 def _perspective_image_pil(
     image: PIL.Image.Image,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
 ) -> PIL.Image.Image:
     perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
     interpolation = _check_interpolation(interpolation)
@@ -1580,10 +1581,10 @@ def _perspective_image_pil(
 def perspective_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
-    coefficients: Optional[List[float]] = None,
+    canvas_size: tuple[int, int],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
+    coefficients: Optional[list[float]] = None,
 ) -> torch.Tensor:
     if bounding_boxes.numel() == 0:
         return bounding_boxes
@@ -1674,9 +1675,9 @@ def perspective_bounding_boxes(
 @_register_kernel_internal(perspective, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _perspective_bounding_boxes_dispatch(
     inpt: tv_tensors.BoundingBoxes,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
-    coefficients: Optional[List[float]] = None,
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
+    coefficients: Optional[list[float]] = None,
     **kwargs,
 ) -> tv_tensors.BoundingBoxes:
     output = perspective_bounding_boxes(
@@ -1692,10 +1693,10 @@ def _perspective_bounding_boxes_dispatch(
 
 def perspective_mask(
     mask: torch.Tensor,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
 ) -> torch.Tensor:
     if mask.ndim < 3:
         mask = mask.unsqueeze(0)
@@ -1716,10 +1717,10 @@ def perspective_mask(
 @_register_kernel_internal(perspective, tv_tensors.Mask, tv_tensor_wrapper=False)
 def _perspective_mask_dispatch(
     inpt: tv_tensors.Mask,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
     **kwargs,
 ) -> tv_tensors.Mask:
     output = perspective_mask(
@@ -1735,11 +1736,11 @@ def _perspective_mask_dispatch(
 @_register_kernel_internal(perspective, tv_tensors.Video)
 def perspective_video(
     video: torch.Tensor,
-    startpoints: Optional[List[List[int]]],
-    endpoints: Optional[List[List[int]]],
+    startpoints: Optional[list[list[int]]],
+    endpoints: Optional[list[list[int]]],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     fill: _FillTypeJIT = None,
-    coefficients: Optional[List[float]] = None,
+    coefficients: Optional[list[float]] = None,
 ) -> torch.Tensor:
     return perspective_image(
         video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
@@ -1819,7 +1820,7 @@ def _elastic_image_pil(
     return to_pil_image(output, mode=image.mode)
 
 
-def _create_identity_grid(size: Tuple[int, int], device: torch.device, dtype: torch.dtype) -> torch.Tensor:
+def _create_identity_grid(size: tuple[int, int], device: torch.device, dtype: torch.dtype) -> torch.Tensor:
     sy, sx = size
     base_grid = torch.empty(1, sy, sx, 2, device=device, dtype=dtype)
     x_grid = torch.linspace((-sx + 1) / sx, (sx - 1) / sx, sx, device=device, dtype=dtype)
@@ -1834,7 +1835,7 @@ def _create_identity_grid(size: Tuple[int, int], device: torch.device, dtype: to
 def elastic_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
+    canvas_size: tuple[int, int],
     displacement: torch.Tensor,
 ) -> torch.Tensor:
     expected_shape = (1, canvas_size[0], canvas_size[1], 2)
@@ -1935,7 +1936,7 @@ def elastic_video(
     return elastic_image(video, displacement, interpolation=interpolation, fill=fill)
 
 
-def center_crop(inpt: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+def center_crop(inpt: torch.Tensor, output_size: list[int]) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.RandomCrop` for details."""
     if torch.jit.is_scripting():
         return center_crop_image(inpt, output_size=output_size)
@@ -1946,7 +1947,7 @@ def center_crop(inpt: torch.Tensor, output_size: List[int]) -> torch.Tensor:
     return kernel(inpt, output_size=output_size)
 
 
-def _center_crop_parse_output_size(output_size: List[int]) -> List[int]:
+def _center_crop_parse_output_size(output_size: list[int]) -> list[int]:
     if isinstance(output_size, numbers.Number):
         s = int(output_size)
         return [s, s]
@@ -1956,7 +1957,7 @@ def _center_crop_parse_output_size(output_size: List[int]) -> List[int]:
         return list(output_size)
 
 
-def _center_crop_compute_padding(crop_height: int, crop_width: int, image_height: int, image_width: int) -> List[int]:
+def _center_crop_compute_padding(crop_height: int, crop_width: int, image_height: int, image_width: int) -> list[int]:
     return [
         (crop_width - image_width) // 2 if crop_width > image_width else 0,
         (crop_height - image_height) // 2 if crop_height > image_height else 0,
@@ -1967,7 +1968,7 @@ def _center_crop_compute_padding(crop_height: int, crop_width: int, image_height
 
 def _center_crop_compute_crop_anchor(
     crop_height: int, crop_width: int, image_height: int, image_width: int
-) -> Tuple[int, int]:
+) -> tuple[int, int]:
     crop_top = int(round((image_height - crop_height) / 2.0))
     crop_left = int(round((image_width - crop_width) / 2.0))
     return crop_top, crop_left
@@ -1975,7 +1976,7 @@ def _center_crop_compute_crop_anchor(
 
 @_register_kernel_internal(center_crop, torch.Tensor)
 @_register_kernel_internal(center_crop, tv_tensors.Image)
-def center_crop_image(image: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+def center_crop_image(image: torch.Tensor, output_size: list[int]) -> torch.Tensor:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
     shape = image.shape
     if image.numel() == 0:
@@ -1995,7 +1996,7 @@ def center_crop_image(image: torch.Tensor, output_size: List[int]) -> torch.Tens
 
 
 @_register_kernel_internal(center_crop, PIL.Image.Image)
-def _center_crop_image_pil(image: PIL.Image.Image, output_size: List[int]) -> PIL.Image.Image:
+def _center_crop_image_pil(image: PIL.Image.Image, output_size: list[int]) -> PIL.Image.Image:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
     image_height, image_width = _get_size_image_pil(image)
 
@@ -2014,9 +2015,9 @@ def _center_crop_image_pil(image: PIL.Image.Image, output_size: List[int]) -> PI
 def center_crop_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
-    output_size: List[int],
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+    canvas_size: tuple[int, int],
+    output_size: list[int],
+) -> tuple[torch.Tensor, tuple[int, int]]:
     crop_height, crop_width = _center_crop_parse_output_size(output_size)
     crop_top, crop_left = _center_crop_compute_crop_anchor(crop_height, crop_width, *canvas_size)
     return crop_bounding_boxes(
@@ -2026,7 +2027,7 @@ def center_crop_bounding_boxes(
 
 @_register_kernel_internal(center_crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _center_crop_bounding_boxes_dispatch(
-    inpt: tv_tensors.BoundingBoxes, output_size: List[int]
+    inpt: tv_tensors.BoundingBoxes, output_size: list[int]
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = center_crop_bounding_boxes(
         inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, output_size=output_size
@@ -2035,7 +2036,7 @@ def _center_crop_bounding_boxes_dispatch(
 
 
 @_register_kernel_internal(center_crop, tv_tensors.Mask)
-def center_crop_mask(mask: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+def center_crop_mask(mask: torch.Tensor, output_size: list[int]) -> torch.Tensor:
     if mask.ndim < 3:
         mask = mask.unsqueeze(0)
         needs_squeeze = True
@@ -2051,7 +2052,7 @@ def center_crop_mask(mask: torch.Tensor, output_size: List[int]) -> torch.Tensor
 
 
 @_register_kernel_internal(center_crop, tv_tensors.Video)
-def center_crop_video(video: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+def center_crop_video(video: torch.Tensor, output_size: list[int]) -> torch.Tensor:
     return center_crop_image(video, output_size)
 
 
@@ -2061,7 +2062,7 @@ def resized_crop(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     antialias: Optional[bool] = True,
 ) -> torch.Tensor:
@@ -2101,7 +2102,7 @@ def resized_crop_image(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     antialias: Optional[bool] = True,
 ) -> torch.Tensor:
@@ -2115,7 +2116,7 @@ def _resized_crop_image_pil(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
 ) -> PIL.Image.Image:
     image = _crop_image_pil(image, top, left, height, width)
@@ -2129,7 +2130,7 @@ def _resized_crop_image_pil_dispatch(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     antialias: Optional[bool] = True,
 ) -> PIL.Image.Image:
@@ -2153,15 +2154,15 @@ def resized_crop_bounding_boxes(
     left: int,
     height: int,
     width: int,
-    size: List[int],
-) -> Tuple[torch.Tensor, Tuple[int, int]]:
+    size: list[int],
+) -> tuple[torch.Tensor, tuple[int, int]]:
     bounding_boxes, canvas_size = crop_bounding_boxes(bounding_boxes, format, top, left, height, width)
     return resize_bounding_boxes(bounding_boxes, canvas_size=canvas_size, size=size)
 
 
 @_register_kernel_internal(resized_crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
 def _resized_crop_bounding_boxes_dispatch(
-    inpt: tv_tensors.BoundingBoxes, top: int, left: int, height: int, width: int, size: List[int], **kwargs
+    inpt: tv_tensors.BoundingBoxes, top: int, left: int, height: int, width: int, size: list[int], **kwargs
 ) -> tv_tensors.BoundingBoxes:
     output, canvas_size = resized_crop_bounding_boxes(
         inpt.as_subclass(torch.Tensor), format=inpt.format, top=top, left=left, height=height, width=width, size=size
@@ -2175,7 +2176,7 @@ def resized_crop_mask(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
 ) -> torch.Tensor:
     mask = crop_mask(mask, top, left, height, width)
     return resize_mask(mask, size)
@@ -2183,7 +2184,7 @@ def resized_crop_mask(
 
 @_register_kernel_internal(resized_crop, tv_tensors.Mask, tv_tensor_wrapper=False)
 def _resized_crop_mask_dispatch(
-    inpt: tv_tensors.Mask, top: int, left: int, height: int, width: int, size: List[int], **kwargs
+    inpt: tv_tensors.Mask, top: int, left: int, height: int, width: int, size: list[int], **kwargs
 ) -> tv_tensors.Mask:
     output = resized_crop_mask(
         inpt.as_subclass(torch.Tensor), top=top, left=left, height=height, width=width, size=size
@@ -2198,7 +2199,7 @@ def resized_crop_video(
     left: int,
     height: int,
     width: int,
-    size: List[int],
+    size: list[int],
     interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
     antialias: Optional[bool] = True,
 ) -> torch.Tensor:
@@ -2208,8 +2209,8 @@ def resized_crop_video(
 
 
 def five_crop(
-    inpt: torch.Tensor, size: List[int]
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    inpt: torch.Tensor, size: list[int]
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """See :class:`~torchvision.transforms.v2.FiveCrop` for details."""
     if torch.jit.is_scripting():
         return five_crop_image(inpt, size=size)
@@ -2220,7 +2221,7 @@ def five_crop(
     return kernel(inpt, size=size)
 
 
-def _parse_five_crop_size(size: List[int]) -> List[int]:
+def _parse_five_crop_size(size: list[int]) -> list[int]:
     if isinstance(size, numbers.Number):
         s = int(size)
         size = [s, s]
@@ -2237,8 +2238,8 @@ def _parse_five_crop_size(size: List[int]) -> List[int]:
 @_register_five_ten_crop_kernel_internal(five_crop, torch.Tensor)
 @_register_five_ten_crop_kernel_internal(five_crop, tv_tensors.Image)
 def five_crop_image(
-    image: torch.Tensor, size: List[int]
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    image: torch.Tensor, size: list[int]
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     crop_height, crop_width = _parse_five_crop_size(size)
     image_height, image_width = image.shape[-2:]
 
@@ -2256,8 +2257,8 @@ def five_crop_image(
 
 @_register_five_ten_crop_kernel_internal(five_crop, PIL.Image.Image)
 def _five_crop_image_pil(
-    image: PIL.Image.Image, size: List[int]
-) -> Tuple[PIL.Image.Image, PIL.Image.Image, PIL.Image.Image, PIL.Image.Image, PIL.Image.Image]:
+    image: PIL.Image.Image, size: list[int]
+) -> tuple[PIL.Image.Image, PIL.Image.Image, PIL.Image.Image, PIL.Image.Image, PIL.Image.Image]:
     crop_height, crop_width = _parse_five_crop_size(size)
     image_height, image_width = _get_size_image_pil(image)
 
@@ -2275,14 +2276,14 @@ def _five_crop_image_pil(
 
 @_register_five_ten_crop_kernel_internal(five_crop, tv_tensors.Video)
 def five_crop_video(
-    video: torch.Tensor, size: List[int]
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    video: torch.Tensor, size: list[int]
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     return five_crop_image(video, size)
 
 
 def ten_crop(
-    inpt: torch.Tensor, size: List[int], vertical_flip: bool = False
-) -> Tuple[
+    inpt: torch.Tensor, size: list[int], vertical_flip: bool = False
+) -> tuple[
     torch.Tensor,
     torch.Tensor,
     torch.Tensor,
@@ -2307,8 +2308,8 @@ def ten_crop(
 @_register_five_ten_crop_kernel_internal(ten_crop, torch.Tensor)
 @_register_five_ten_crop_kernel_internal(ten_crop, tv_tensors.Image)
 def ten_crop_image(
-    image: torch.Tensor, size: List[int], vertical_flip: bool = False
-) -> Tuple[
+    image: torch.Tensor, size: list[int], vertical_flip: bool = False
+) -> tuple[
     torch.Tensor,
     torch.Tensor,
     torch.Tensor,
@@ -2334,8 +2335,8 @@ def ten_crop_image(
 
 @_register_five_ten_crop_kernel_internal(ten_crop, PIL.Image.Image)
 def _ten_crop_image_pil(
-    image: PIL.Image.Image, size: List[int], vertical_flip: bool = False
-) -> Tuple[
+    image: PIL.Image.Image, size: list[int], vertical_flip: bool = False
+) -> tuple[
     PIL.Image.Image,
     PIL.Image.Image,
     PIL.Image.Image,
@@ -2361,8 +2362,8 @@ def _ten_crop_image_pil(
 
 @_register_five_ten_crop_kernel_internal(ten_crop, tv_tensors.Video)
 def ten_crop_video(
-    video: torch.Tensor, size: List[int], vertical_flip: bool = False
-) -> Tuple[
+    video: torch.Tensor, size: list[int], vertical_flip: bool = False
+) -> tuple[
     torch.Tensor,
     torch.Tensor,
     torch.Tensor,
diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
index b90e5fb7b5b..5de93d4302e 100644
--- a/torchvision/transforms/v2/functional/_meta.py
+++ b/torchvision/transforms/v2/functional/_meta.py
@@ -11,7 +11,7 @@
 from ._utils import _get_kernel, _register_kernel_internal, is_pure_tensor
 
 
-def get_dimensions(inpt: torch.Tensor) -> List[int]:
+def get_dimensions(inpt: torch.Tensor) -> list[int]:
     if torch.jit.is_scripting():
         return get_dimensions_image(inpt)
 
@@ -23,7 +23,7 @@ def get_dimensions(inpt: torch.Tensor) -> List[int]:
 
 @_register_kernel_internal(get_dimensions, torch.Tensor)
 @_register_kernel_internal(get_dimensions, tv_tensors.Image, tv_tensor_wrapper=False)
-def get_dimensions_image(image: torch.Tensor) -> List[int]:
+def get_dimensions_image(image: torch.Tensor) -> list[int]:
     chw = list(image.shape[-3:])
     ndims = len(chw)
     if ndims == 3:
@@ -39,7 +39,7 @@ def get_dimensions_image(image: torch.Tensor) -> List[int]:
 
 
 @_register_kernel_internal(get_dimensions, tv_tensors.Video, tv_tensor_wrapper=False)
-def get_dimensions_video(video: torch.Tensor) -> List[int]:
+def get_dimensions_video(video: torch.Tensor) -> list[int]:
     return get_dimensions_image(video)
 
 
@@ -79,7 +79,7 @@ def get_num_channels_video(video: torch.Tensor) -> int:
 get_image_num_channels = get_num_channels
 
 
-def get_size(inpt: torch.Tensor) -> List[int]:
+def get_size(inpt: torch.Tensor) -> list[int]:
     if torch.jit.is_scripting():
         return get_size_image(inpt)
 
@@ -91,7 +91,7 @@ def get_size(inpt: torch.Tensor) -> List[int]:
 
 @_register_kernel_internal(get_size, torch.Tensor)
 @_register_kernel_internal(get_size, tv_tensors.Image, tv_tensor_wrapper=False)
-def get_size_image(image: torch.Tensor) -> List[int]:
+def get_size_image(image: torch.Tensor) -> list[int]:
     hw = list(image.shape[-2:])
     ndims = len(hw)
     if ndims == 2:
@@ -101,23 +101,23 @@ def get_size_image(image: torch.Tensor) -> List[int]:
 
 
 @_register_kernel_internal(get_size, PIL.Image.Image)
-def _get_size_image_pil(image: PIL.Image.Image) -> List[int]:
+def _get_size_image_pil(image: PIL.Image.Image) -> list[int]:
     width, height = _FP.get_image_size(image)
     return [height, width]
 
 
 @_register_kernel_internal(get_size, tv_tensors.Video, tv_tensor_wrapper=False)
-def get_size_video(video: torch.Tensor) -> List[int]:
+def get_size_video(video: torch.Tensor) -> list[int]:
     return get_size_image(video)
 
 
 @_register_kernel_internal(get_size, tv_tensors.Mask, tv_tensor_wrapper=False)
-def get_size_mask(mask: torch.Tensor) -> List[int]:
+def get_size_mask(mask: torch.Tensor) -> list[int]:
     return get_size_image(mask)
 
 
 @_register_kernel_internal(get_size, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
-def get_size_bounding_boxes(bounding_box: tv_tensors.BoundingBoxes) -> List[int]:
+def get_size_bounding_boxes(bounding_box: tv_tensors.BoundingBoxes) -> list[int]:
     return list(bounding_box.canvas_size)
 
 
@@ -237,7 +237,7 @@ def convert_bounding_box_format(
 
 
 def _clamp_bounding_boxes(
-    bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: Tuple[int, int]
+    bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int]
 ) -> torch.Tensor:
     # TODO: Investigate if it makes sense from a performance perspective to have an implementation for every
     #  BoundingBoxFormat instead of converting back and forth
@@ -257,7 +257,7 @@ def _clamp_bounding_boxes(
 def clamp_bounding_boxes(
     inpt: torch.Tensor,
     format: Optional[BoundingBoxFormat] = None,
-    canvas_size: Optional[Tuple[int, int]] = None,
+    canvas_size: Optional[tuple[int, int]] = None,
 ) -> torch.Tensor:
     """See :func:`~torchvision.transforms.v2.ClampBoundingBoxes` for details."""
     if not torch.jit.is_scripting():
diff --git a/torchvision/transforms/v2/functional/_misc.py b/torchvision/transforms/v2/functional/_misc.py
index f40bf117753..b3a4762b532 100644
--- a/torchvision/transforms/v2/functional/_misc.py
+++ b/torchvision/transforms/v2/functional/_misc.py
@@ -18,8 +18,8 @@
 
 def normalize(
     inpt: torch.Tensor,
-    mean: List[float],
-    std: List[float],
+    mean: list[float],
+    std: list[float],
     inplace: bool = False,
 ) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.Normalize` for details."""
@@ -34,7 +34,7 @@ def normalize(
 
 @_register_kernel_internal(normalize, torch.Tensor)
 @_register_kernel_internal(normalize, tv_tensors.Image)
-def normalize_image(image: torch.Tensor, mean: List[float], std: List[float], inplace: bool = False) -> torch.Tensor:
+def normalize_image(image: torch.Tensor, mean: list[float], std: list[float], inplace: bool = False) -> torch.Tensor:
     if not image.is_floating_point():
         raise TypeError(f"Input tensor should be a float tensor. Got {image.dtype}.")
 
@@ -68,11 +68,11 @@ def normalize_image(image: torch.Tensor, mean: List[float], std: List[float], in
 
 
 @_register_kernel_internal(normalize, tv_tensors.Video)
-def normalize_video(video: torch.Tensor, mean: List[float], std: List[float], inplace: bool = False) -> torch.Tensor:
+def normalize_video(video: torch.Tensor, mean: list[float], std: list[float], inplace: bool = False) -> torch.Tensor:
     return normalize_image(video, mean, std, inplace=inplace)
 
 
-def gaussian_blur(inpt: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> torch.Tensor:
+def gaussian_blur(inpt: torch.Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None) -> torch.Tensor:
     """See :class:`~torchvision.transforms.v2.GaussianBlur` for details."""
     if torch.jit.is_scripting():
         return gaussian_blur_image(inpt, kernel_size=kernel_size, sigma=sigma)
@@ -91,7 +91,7 @@ def _get_gaussian_kernel1d(kernel_size: int, sigma: float, dtype: torch.dtype, d
 
 
 def _get_gaussian_kernel2d(
-    kernel_size: List[int], sigma: List[float], dtype: torch.dtype, device: torch.device
+    kernel_size: list[int], sigma: list[float], dtype: torch.dtype, device: torch.device
 ) -> torch.Tensor:
     kernel1d_x = _get_gaussian_kernel1d(kernel_size[0], sigma[0], dtype, device)
     kernel1d_y = _get_gaussian_kernel1d(kernel_size[1], sigma[1], dtype, device)
@@ -102,7 +102,7 @@ def _get_gaussian_kernel2d(
 @_register_kernel_internal(gaussian_blur, torch.Tensor)
 @_register_kernel_internal(gaussian_blur, tv_tensors.Image)
 def gaussian_blur_image(
-    image: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None
+    image: torch.Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None
 ) -> torch.Tensor:
     # TODO: consider deprecating integers from sigma on the future
     if isinstance(kernel_size, int):
@@ -167,7 +167,7 @@ def gaussian_blur_image(
 
 @_register_kernel_internal(gaussian_blur, PIL.Image.Image)
 def _gaussian_blur_image_pil(
-    image: PIL.Image.Image, kernel_size: List[int], sigma: Optional[List[float]] = None
+    image: PIL.Image.Image, kernel_size: list[int], sigma: Optional[list[float]] = None
 ) -> PIL.Image.Image:
     t_img = pil_to_tensor(image)
     output = gaussian_blur_image(t_img, kernel_size=kernel_size, sigma=sigma)
@@ -176,7 +176,7 @@ def _gaussian_blur_image_pil(
 
 @_register_kernel_internal(gaussian_blur, tv_tensors.Video)
 def gaussian_blur_video(
-    video: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None
+    video: torch.Tensor, kernel_size: list[int], sigma: Optional[list[float]] = None
 ) -> torch.Tensor:
     return gaussian_blur_image(video, kernel_size, sigma)
 
@@ -330,10 +330,10 @@ def _to_dtype_tensor_dispatch(inpt: torch.Tensor, dtype: torch.dtype, scale: boo
 def sanitize_bounding_boxes(
     bounding_boxes: torch.Tensor,
     format: Optional[tv_tensors.BoundingBoxFormat] = None,
-    canvas_size: Optional[Tuple[int, int]] = None,
+    canvas_size: Optional[tuple[int, int]] = None,
     min_size: float = 1.0,
     min_area: float = 1.0,
-) -> Tuple[torch.Tensor, torch.Tensor]:
+) -> tuple[torch.Tensor, torch.Tensor]:
     """Remove degenerate/invalid bounding boxes and return the corresponding indexing mask.
 
     This removes bounding boxes that:
@@ -400,7 +400,7 @@ def sanitize_bounding_boxes(
 def _get_sanitize_bounding_boxes_mask(
     bounding_boxes: torch.Tensor,
     format: tv_tensors.BoundingBoxFormat,
-    canvas_size: Tuple[int, int],
+    canvas_size: tuple[int, int],
     min_size: float = 1.0,
     min_area: float = 1.0,
 ) -> torch.Tensor:
diff --git a/torchvision/transforms/v2/functional/_utils.py b/torchvision/transforms/v2/functional/_utils.py
index fe0faeddc1b..bfe3c5e7555 100644
--- a/torchvision/transforms/v2/functional/_utils.py
+++ b/torchvision/transforms/v2/functional/_utils.py
@@ -1,11 +1,12 @@
 import functools
-from typing import Any, Callable, Dict, List, Optional, Sequence, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Type, Union
+from collections.abc import Sequence
 
 import torch
 from torchvision import tv_tensors
 
 _FillType = Union[int, float, Sequence[int], Sequence[float], None]
-_FillTypeJIT = Optional[List[float]]
+_FillTypeJIT = Optional[list[float]]
 
 
 def is_pure_tensor(inpt: Any) -> bool:
@@ -13,7 +14,7 @@ def is_pure_tensor(inpt: Any) -> bool:
 
 
 # {functional: {input_type: type_specific_kernel}}
-_KERNEL_REGISTRY: Dict[Callable, Dict[Type, Callable]] = {}
+_KERNEL_REGISTRY: dict[Callable, dict[type, Callable]] = {}
 
 
 def _kernel_tv_tensor_wrapper(kernel):