From f8b25744163d54612ae12203160e03623ddc9c09 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 24 May 2023 13:52:52 +0100 Subject: [PATCH] Better TF docstring types (#23477) * Rework TF type hints to use | None instead of Optional[] for tf.Tensor * Rework TF type hints to use | None instead of Optional[] for tf.Tensor * Don't forget the imports * Add the imports to tests too * make fixup * Refactor tests that depended on get_type_hints * Better test refactor * Fix an old hidden bug in the test_keras_fit input creation code * Fix for the Deit tests --- src/transformers/modeling_tf_outputs.py | 190 ++++++++--------- src/transformers/modeling_tf_utils.py | 4 +- .../models/albert/modeling_tf_albert.py | 119 ++++++----- .../models/bart/modeling_tf_bart.py | 136 ++++++------ .../models/bert/modeling_tf_bert.py | 169 +++++++-------- .../blenderbot/modeling_tf_blenderbot.py | 72 ++++--- .../modeling_tf_blenderbot_small.py | 74 +++---- .../models/blip/modeling_tf_blip.py | 92 ++++---- .../models/blip/modeling_tf_blip_text.py | 13 +- .../models/camembert/modeling_tf_camembert.py | 137 ++++++------ .../models/clip/modeling_tf_clip.py | 52 ++--- .../models/convbert/modeling_tf_convbert.py | 78 +++---- .../models/convnext/modeling_tf_convnext.py | 10 +- .../models/ctrl/modeling_tf_ctrl.py | 55 ++--- .../models/cvt/modeling_tf_cvt.py | 12 +- .../data2vec/modeling_tf_data2vec_vision.py | 35 +-- .../models/deberta/modeling_tf_deberta.py | 72 ++++--- .../deberta_v2/modeling_tf_deberta_v2.py | 72 ++++--- .../models/deit/modeling_tf_deit.py | 36 ++-- .../distilbert/modeling_tf_distilbert.py | 63 +++--- .../models/dpr/modeling_tf_dpr.py | 54 ++--- .../models/electra/modeling_tf_electra.py | 135 ++++++------ .../modeling_tf_encoder_decoder.py | 21 +- .../models/esm/modeling_tf_esm.py | 81 +++---- .../models/flaubert/modeling_tf_flaubert.py | 129 ++++++------ .../models/funnel/modeling_tf_funnel.py | 83 ++++---- .../models/gpt2/modeling_tf_gpt2.py | 86 ++++---- .../models/gptj/modeling_tf_gptj.py | 76 +++---- .../models/groupvit/modeling_tf_groupvit.py | 56 ++--- .../models/hubert/modeling_tf_hubert.py | 59 +++--- .../models/layoutlm/modeling_tf_layoutlm.py | 117 +++++----- .../layoutlmv3/modeling_tf_layoutlmv3.py | 133 ++++++------ .../models/led/modeling_tf_led.py | 84 ++++---- .../longformer/modeling_tf_longformer.py | 149 ++++++------- .../models/lxmert/modeling_tf_lxmert.py | 51 ++--- .../models/marian/modeling_tf_marian.py | 122 +++++------ .../models/mbart/modeling_tf_mbart.py | 110 +++++----- .../mobilebert/modeling_tf_mobilebert.py | 123 +++++------ .../models/mobilevit/modeling_tf_mobilevit.py | 14 +- .../models/mpnet/modeling_tf_mpnet.py | 56 ++--- .../models/openai/modeling_tf_openai.py | 72 ++++--- .../models/opt/modeling_tf_opt.py | 52 ++--- .../models/pegasus/modeling_tf_pegasus.py | 112 +++++----- .../models/rag/modeling_tf_rag.py | 121 +++++------ .../models/rembert/modeling_tf_rembert.py | 130 ++++++------ .../models/roberta/modeling_tf_roberta.py | 137 ++++++------ .../modeling_tf_roberta_prelayernorm.py | 137 ++++++------ .../models/roformer/modeling_tf_roformer.py | 96 ++++----- .../models/sam/modeling_tf_sam.py | 45 ++-- .../models/segformer/modeling_tf_segformer.py | 9 +- .../modeling_tf_speech_to_text.py | 62 +++--- .../models/swin/modeling_tf_swin.py | 72 ++++--- src/transformers/models/t5/modeling_tf_t5.py | 49 +++-- .../models/tapas/modeling_tf_tapas.py | 99 ++++----- .../transfo_xl/modeling_tf_transfo_xl.py | 54 ++--- .../modeling_tf_vision_encoder_decoder.py | 12 +- .../modeling_tf_vision_text_dual_encoder.py | 12 +- .../models/vit/modeling_tf_vit.py | 16 +- .../models/vit_mae/modeling_tf_vit_mae.py | 31 +-- .../models/wav2vec2/modeling_tf_wav2vec2.py | 63 +++--- .../models/whisper/modeling_tf_whisper.py | 54 ++--- .../models/xglm/modeling_tf_xglm.py | 80 +++---- .../models/xlm/modeling_tf_xlm.py | 97 ++++----- .../xlm_roberta/modeling_tf_xlm_roberta.py | 137 ++++++------ .../models/xlnet/modeling_tf_xlnet.py | 199 +++++++++--------- ...tf_{{cookiecutter.lowercase_modelname}}.py | 156 +++++++------- tests/generation/test_tf_logits_process.py | 2 + tests/generation/test_tf_utils.py | 2 + .../models/albert/test_modeling_tf_albert.py | 2 + tests/models/auto/test_modeling_tf_auto.py | 2 + tests/models/auto/test_modeling_tf_pytorch.py | 2 + tests/models/bart/test_modeling_tf_bart.py | 2 + tests/models/bert/test_modeling_tf_bert.py | 2 + .../blenderbot/test_modeling_tf_blenderbot.py | 2 + .../test_modeling_tf_blenderbot_small.py | 2 + tests/models/blip/test_modeling_tf_blip.py | 2 + .../models/blip/test_modeling_tf_blip_text.py | 2 + tests/models/bort/test_modeling_tf_bort.py | 2 + .../camembert/test_modeling_tf_camembert.py | 2 + tests/models/clip/test_modeling_tf_clip.py | 2 + .../convbert/test_modeling_tf_convbert.py | 2 + .../convnext/test_modeling_tf_convnext.py | 2 + tests/models/ctrl/test_modeling_tf_ctrl.py | 2 + tests/models/cvt/test_modeling_tf_cvt.py | 2 + .../test_modeling_tf_data2vec_vision.py | 2 + .../deberta/test_modeling_tf_deberta.py | 2 + .../deberta_v2/test_modeling_tf_deberta_v2.py | 2 + tests/models/deit/test_modeling_tf_deit.py | 4 +- .../distilbert/test_modeling_tf_distilbert.py | 2 + tests/models/dpr/test_modeling_tf_dpr.py | 2 + .../electra/test_modeling_tf_electra.py | 2 + .../test_modeling_tf_encoder_decoder.py | 2 + tests/models/esm/test_modeling_tf_esm.py | 2 + .../flaubert/test_modeling_tf_flaubert.py | 2 + .../models/funnel/test_modeling_tf_funnel.py | 2 + tests/models/gpt2/test_modeling_tf_gpt2.py | 2 + tests/models/gptj/test_modeling_tf_gptj.py | 2 + .../groupvit/test_modeling_tf_groupvit.py | 2 + .../models/hubert/test_modeling_tf_hubert.py | 2 + .../layoutlm/test_modeling_tf_layoutlm.py | 2 + .../layoutlmv3/test_modeling_tf_layoutlmv3.py | 2 + tests/models/led/test_modeling_tf_led.py | 2 + .../longformer/test_modeling_tf_longformer.py | 2 + .../models/lxmert/test_modeling_tf_lxmert.py | 2 + .../models/marian/test_modeling_tf_marian.py | 2 + tests/models/mbart/test_modeling_tf_mbart.py | 2 + .../mobilebert/test_modeling_tf_mobilebert.py | 2 + .../mobilevit/test_modeling_tf_mobilevit.py | 2 + tests/models/mpnet/test_modeling_tf_mpnet.py | 2 + tests/models/mt5/test_modeling_tf_mt5.py | 2 + .../models/openai/test_modeling_tf_openai.py | 2 + tests/models/opt/test_modeling_tf_opt.py | 2 + .../pegasus/test_modeling_tf_pegasus.py | 2 + tests/models/rag/test_modeling_tf_rag.py | 2 + .../models/regnet/test_modeling_tf_regnet.py | 2 + .../rembert/test_modeling_tf_rembert.py | 2 + .../models/resnet/test_modeling_tf_resnet.py | 2 + .../roberta/test_modeling_tf_roberta.py | 2 + .../test_modeling_tf_roberta_prelayernorm.py | 2 + .../roformer/test_modeling_tf_roformer.py | 2 + tests/models/sam/test_modeling_tf_sam.py | 2 + .../segformer/test_modeling_tf_segformer.py | 2 + .../test_modeling_tf_speech_to_text.py | 2 + tests/models/swin/test_modeling_tf_swin.py | 2 + tests/models/t5/test_modeling_tf_t5.py | 2 + tests/models/tapas/test_modeling_tf_tapas.py | 2 + .../transfo_xl/test_modeling_tf_transfo_xl.py | 2 + ...test_modeling_tf_vision_encoder_decoder.py | 2 + ...st_modeling_tf_vision_text_dual_encoder.py | 2 + tests/models/vit/test_modeling_tf_vit.py | 2 + .../vit_mae/test_modeling_tf_vit_mae.py | 2 + .../wav2vec2/test_modeling_tf_wav2vec2.py | 2 + .../whisper/test_modeling_tf_whisper.py | 2 + tests/models/xglm/test_modeling_tf_xglm.py | 2 + tests/models/xlm/test_modeling_tf_xlm.py | 2 + .../test_modeling_tf_xlm_roberta.py | 2 + tests/models/xlnet/test_modeling_tf_xlnet.py | 2 + tests/test_modeling_tf_common.py | 48 ++--- tests/utils/test_modeling_tf_core.py | 2 + 139 files changed, 2907 insertions(+), 2621 deletions(-) diff --git a/src/transformers/modeling_tf_outputs.py b/src/transformers/modeling_tf_outputs.py index f8148b169543..357c34bc1f25 100644 --- a/src/transformers/modeling_tf_outputs.py +++ b/src/transformers/modeling_tf_outputs.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -43,8 +45,8 @@ class TFBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -96,8 +98,8 @@ class TFBaseModelOutputWithPooling(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -164,10 +166,10 @@ class TFBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -201,9 +203,9 @@ class TFBaseModelOutputWithPast(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -234,9 +236,9 @@ class TFBaseModelOutputWithCrossAttentions(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -276,10 +278,10 @@ class TFBaseModelOutputWithPastAndCrossAttentions(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -333,13 +335,13 @@ class TFSeq2SeqModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -365,10 +367,10 @@ class TFCausalLMOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -400,11 +402,11 @@ class TFCausalLMOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -442,12 +444,12 @@ class TFCausalLMOutputWithCrossAttentions(ModelOutput): `past_key_values` input) to speed up sequential decoding. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -473,10 +475,10 @@ class TFMaskedLMOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -527,15 +529,15 @@ class TFSeq2SeqLMOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -562,10 +564,10 @@ class TFNextSentencePredictorOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -591,10 +593,10 @@ class TFSequenceClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -642,15 +644,15 @@ class TFSeq2SeqSequenceClassifierOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -684,10 +686,10 @@ class TFSemanticSegmenterOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -716,9 +718,9 @@ class TFSemanticSegmenterOutputWithNoAttention(ModelOutput): Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -742,10 +744,10 @@ class TFImageClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -773,10 +775,10 @@ class TFMultipleChoiceModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -802,10 +804,10 @@ class TFTokenClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -833,11 +835,11 @@ class TFQuestionAnsweringModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -884,15 +886,15 @@ class TFSeq2SeqQuestionAnsweringModelOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -924,11 +926,11 @@ class TFSequenceClassifierOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -947,7 +949,7 @@ class TFImageClassifierOutputWithNoAttention(ModelOutput): feature maps) of the model at the output of each stage. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None hidden_states: Optional[Tuple[tf.Tensor, ...]] = None @@ -974,10 +976,10 @@ class TFMaskedImageModelingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None reconstruction: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @property def logits(self): diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index c58926f476c1..c2b0485b5f4c 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -15,6 +15,8 @@ # limitations under the License. """TF general model utils.""" +from __future__ import annotations + import functools import gc import inspect @@ -1154,7 +1156,7 @@ def _from_config(cls, config, **kwargs): """ return cls(config, **kwargs) - def get_head_mask(self, head_mask: Optional[tf.Tensor], num_hidden_layers: int) -> tf.Tensor: + def get_head_mask(self, head_mask: tf.Tensor | None, num_hidden_layers: int) -> tf.Tensor: """ Prepare the head mask if needed. diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index c7f76b175b0b..57e2414e720d 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 ALBERT model.""" + +from __future__ import annotations + import math from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -561,12 +564,12 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -676,8 +679,8 @@ class TFAlbertForPreTrainingOutput(ModelOutput): loss: tf.Tensor = None prediction_logits: tf.Tensor = None sop_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None ALBERT_START_DOCSTRING = r""" @@ -797,12 +800,12 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -863,17 +866,17 @@ def get_lm_head(self) -> tf.keras.layers.Layer: @replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - sentence_order_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + sentence_order_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFAlbertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" @@ -979,16 +982,16 @@ def get_lm_head(self) -> tf.keras.layers.Layer: @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1097,16 +1100,16 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1189,16 +1192,16 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1276,17 +1279,17 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1386,16 +1389,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 39537b88bfce..5690e022adaa 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -15,6 +15,8 @@ """ TF 2.0 Bart model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -131,7 +133,7 @@ def call( self, input_shape: Optional[tf.TensorShape] = None, past_key_values_length: int = 0, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -180,12 +182,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -314,8 +316,8 @@ def __init__(self, config: BartConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -383,11 +385,11 @@ def __init__(self, config: BartConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -700,10 +702,10 @@ def __init__(self, config: BartConfig, embed_tokens: Optional[tf.keras.layers.Em @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -851,14 +853,14 @@ def __init__(self, config: BartConfig, embed_tokens: Optional[tf.keras.layers.Em @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1073,18 +1075,18 @@ def set_input_embeddings(self, new_embeddings): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1187,18 +1189,18 @@ def get_decoder(self): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1311,23 +1313,23 @@ def set_bias(self, value): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: r""" @@ -1481,23 +1483,23 @@ def __init__(self, config: BartConfig, load_weight_prefix=None, *inputs, **kwarg @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 50ff7f2dddaa..df78d03a0074 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 BERT model.""" + +from __future__ import annotations + import math import warnings from dataclasses import dataclass @@ -452,9 +455,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -530,9 +533,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -735,14 +738,14 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -943,7 +946,7 @@ class TFBertForPreTrainingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None prediction_logits: tf.Tensor = None seq_relationship_logits: tf.Tensor = None hidden_states: Optional[Union[Tuple[tf.Tensor], tf.Tensor]] = None @@ -1067,14 +1070,14 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1175,17 +1178,17 @@ def get_prefix_bias_name(self) -> str: @replace_return_docstrings(output_type=TFBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFBertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" @@ -1304,16 +1307,16 @@ def get_prefix_bias_name(self) -> str: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1401,20 +1404,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: @@ -1513,16 +1516,16 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFNextSentencePredictorOutput, Tuple[tf.Tensor]]: r""" @@ -1621,16 +1624,16 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1714,16 +1717,16 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1848,16 +1851,16 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1941,17 +1944,17 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index ee5755c2035c..66f00d89f897 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -15,6 +15,8 @@ """ TF 2.0 Blenderbot model.""" +from __future__ import annotations + import os import random import warnings @@ -126,7 +128,7 @@ def __init__(self, num_embeddings: int, embedding_dim: int, **kwargs): super().__init__(num_embeddings, embedding_dim, **kwargs) def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -175,12 +177,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -380,12 +382,12 @@ def __init__(self, config: BlenderbotConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1183,18 +1185,18 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1327,23 +1329,23 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P @add_end_docstrings(BLENDERBOT_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index e170085e91c5..541024470d10 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -15,6 +15,8 @@ """ TF 2.0 BlenderbotSmall model.""" +from __future__ import annotations + import random from typing import List, Optional, Tuple, Union @@ -126,7 +128,7 @@ def __init__(self, num_embeddings: int, embedding_dim: int, **kwargs): super().__init__(num_embeddings, embedding_dim, **kwargs) def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -175,12 +177,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -310,8 +312,8 @@ def __init__(self, config: BlenderbotSmallConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -380,11 +382,11 @@ def __init__(self, config: BlenderbotSmallConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -1175,18 +1177,18 @@ def get_decoder(self): ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1303,23 +1305,23 @@ def set_bias(self, value): @add_end_docstrings(BLENDERBOT_SMALL_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index ae32c2f32a52..95269e4351d9 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -14,6 +14,8 @@ # limitations under the License. """ TensorFlow BLIP model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -102,12 +104,12 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput): heads.` """ - loss: Optional[Tuple[tf.Tensor]] = None - decoder_logits: Optional[Tuple[tf.Tensor]] = None - image_embeds: Optional[tf.Tensor] = None + loss: Tuple[tf.Tensor] | None = None + decoder_logits: Tuple[tf.Tensor] | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -136,11 +138,11 @@ class TFBlipTextVisionModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None - image_embeds: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -176,14 +178,14 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput): The question embeddings obtained by the text projection layer. """ - itm_score: Optional[tf.Tensor] = None - loss: Optional[tf.Tensor] = None - image_embeds: Optional[tf.Tensor] = None + itm_score: tf.Tensor | None = None + loss: tf.Tensor | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_pooler_output: Optional[tf.Tensor] = None - attentions: Optional[Tuple[tf.Tensor]] = None - question_embeds: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + vision_pooler_output: tf.Tensor | None = None + attentions: Tuple[tf.Tensor] | None = None + question_embeds: Tuple[tf.Tensor] | None = None @dataclass @@ -208,7 +210,7 @@ class TFBlipOutput(ModelOutput): The output of the [`BlipVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None text_embeds: tf.Tensor = None @@ -359,10 +361,10 @@ def __init__(self, config, **kwargs): def call( self, hidden_states: tf.Tensor, - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: Optional[bool] = None, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[Tuple[tf.Tensor]]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None, Tuple[tf.Tensor] | None]: """Input shape: Batch x Time x Channel""" bsz, tgt_len, embed_dim = shape_list(hidden_states) @@ -573,7 +575,7 @@ def __init__(self, config: BlipConfig, **kwargs): def call( self, inputs_embeds, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -694,7 +696,7 @@ def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOut @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=BlipVisionConfig) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -799,10 +801,10 @@ def build(self, input_shape): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -930,10 +932,10 @@ def serving_output(self, output: TFBlipOutput) -> TFBlipOutput: @replace_return_docstrings(output_type=TFBlipOutput, config_class=BlipConfig) def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -980,9 +982,9 @@ def call( @add_start_docstrings_to_model_forward(BLIP_TEXT_INPUTS_DOCSTRING) def get_text_features( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_dict: Optional[bool] = None, ) -> tf.Tensor: r""" @@ -1018,7 +1020,7 @@ def get_text_features( @add_start_docstrings_to_model_forward(BLIP_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, return_dict: Optional[bool] = None, ) -> tf.Tensor: r""" @@ -1128,11 +1130,11 @@ def serving_output( def call( self, pixel_values: tf.Tensor, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = None, ) -> Union[Tuple, TFBlipForConditionalGenerationModelOutput]: @@ -1197,8 +1199,8 @@ def call( def generate( self, pixel_values: tf.Tensor, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, **generate_kwargs, ) -> tf.Tensor: r""" @@ -1342,13 +1344,13 @@ def call( self, input_ids: tf.Tensor, pixel_values: tf.Tensor, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, foutput_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = None, ) -> Union[Tuple, TFBlipTextVisionModelOutput]: @@ -1451,7 +1453,7 @@ def generate( self, input_ids: tf.Tensor, pixel_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, **generate_kwargs, ) -> tf.Tensor: r""" @@ -1624,9 +1626,9 @@ def serving_output(self, output: TFBlipImageTextMatchingModelOutput) -> TFBlipIm def call( self, input_ids: tf.Tensor, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, use_itm_head: Optional[bool] = True, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index 02f592c25902..bff81223375c 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. + +from __future__ import annotations + import math from typing import Dict, Optional, Tuple @@ -277,11 +280,11 @@ def __init__(self, config, is_cross_attention=False, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, output_attentions: Optional[bool] = False, training: Optional[bool] = None, ): diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index c9e4c98c1467..980462f4be7c 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 CamemBERT model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -526,9 +529,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -605,9 +608,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -705,14 +708,14 @@ class PreTrainedModel # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -922,14 +925,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1085,16 +1088,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1199,16 +1202,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1290,16 +1293,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1387,16 +1390,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1499,17 +1502,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1615,20 +1618,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 7cf52500aed6..9b7976f41366 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -15,6 +15,8 @@ """ TF 2.0 CLIP model.""" +from __future__ import annotations + import math from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -111,7 +113,7 @@ class TFCLIPOutput(ModelOutput): The output of the [`TFCLIPVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None text_embeds: tf.Tensor = None @@ -586,9 +588,9 @@ def set_input_embeddings(self, value: tf.Variable): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -675,7 +677,7 @@ def get_input_embeddings(self) -> tf.keras.layers.Layer: @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -751,9 +753,9 @@ def build(self, input_shape: tf.TensorShape): @unpack_inputs def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -785,7 +787,7 @@ def get_text_features( @unpack_inputs def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -810,10 +812,10 @@ def get_image_features( @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1050,9 +1052,9 @@ def __init__(self, config: CLIPTextConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=CLIPTextConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1158,7 +1160,7 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=CLIPVisionConfig) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1260,9 +1262,9 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFCLIPOutput: @add_start_docstrings_to_model_forward(CLIP_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1300,7 +1302,7 @@ def get_text_features( @add_start_docstrings_to_model_forward(CLIP_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1343,10 +1345,10 @@ def get_image_features( @replace_return_docstrings(output_type=TFCLIPOutput, config_class=CLIPConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index e853da762771..19c2d700dc06 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -15,6 +15,8 @@ """ TF 2.0 ConvBERT model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -742,12 +744,12 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, token_type_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -858,16 +860,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMaskedLMOutput]: r""" @@ -965,16 +967,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" @@ -1057,16 +1059,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMultipleChoiceModelOutput]: r""" @@ -1170,16 +1172,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTokenClassifierOutput]: r""" @@ -1247,17 +1249,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFQuestionAnsweringModelOutput]: r""" diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index 00db1f0b7884..f258abe24cbb 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -15,6 +15,8 @@ """ TF 2.0 ConvNext model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import numpy as np @@ -297,7 +299,7 @@ def __init__(self, config: ConvNextConfig, add_pooling_layer: bool = True, **kwa @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -458,7 +460,7 @@ def __init__(self, config, *inputs, add_pooling_layer=True, **kwargs): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -543,10 +545,10 @@ def __init__(self, config: ConvNextConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index f4742b4e33d7..cddfd4a9e352 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 CTRL model.""" + +from __future__ import annotations + import warnings from typing import Optional, Tuple, Union @@ -256,13 +259,13 @@ def _prune_heads(self, heads_to_prune): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -532,13 +535,13 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -645,18 +648,18 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, use_cac ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFCausalLMOutputWithPast]: r""" @@ -749,18 +752,18 @@ def get_output_embeddings(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index 6ad86071e47d..3c80f53bfaf2 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -15,6 +15,8 @@ """ TF 2.0 Cvt model.""" +from __future__ import annotations + import collections.abc from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -75,7 +77,7 @@ class TFBaseModelOutputWithCLSToken(ModelOutput): last_hidden_state: tf.Tensor = None cls_token_value: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None class TFCvtDropPath(tf.keras.layers.Layer): @@ -668,7 +670,7 @@ def __init__(self, config: CvtConfig, **kwargs): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -797,7 +799,7 @@ def __init__(self, config: CvtConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFBaseModelOutputWithCLSToken, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -880,8 +882,8 @@ def __init__(self, config: CvtConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFImageClassifierOutputWithNoAttention, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 06a6f010dd74..1085d6e48d6c 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 Data2Vec Vision model.""" + +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -94,8 +97,8 @@ class TFData2VecVisionModelOutputWithPooling(TFBaseModelOutputWithPooling): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFData2VecVisionDropPath(tf.keras.layers.Layer): @@ -163,7 +166,7 @@ def build(self, input_shape: tf.TensorShape): super().build(input_shape) - def call(self, pixel_values: tf.Tensor, bool_masked_pos: Optional[tf.Tensor] = None) -> tf.Tensor: + def call(self, pixel_values: tf.Tensor, bool_masked_pos: tf.Tensor | None = None) -> tf.Tensor: embeddings = self.patch_embeddings(pixel_values) batch_size, seq_len, projection_dim = shape_list(embeddings) @@ -609,7 +612,7 @@ def __init__(self, config: Data2VecVisionConfig, window_size: Optional[tuple] = def call( self, hidden_states: tf.Tensor, - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -685,9 +688,9 @@ class PreTrainedModel @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -899,9 +902,9 @@ def get_input_embeddings(self): ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -966,12 +969,12 @@ def __init__(self, config: Data2VecVisionConfig, *inputs, **kwargs): ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, tuple]: r""" @@ -1378,9 +1381,9 @@ def masked_loss(real, pred): @replace_return_docstrings(output_type=TFSemanticSegmenterOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index dcd0582777eb..7a045426185e 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -15,6 +15,8 @@ """ TF 2.0 DeBERTa model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Sequence, Tuple, Union @@ -922,11 +924,11 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1092,11 +1094,11 @@ def __init__(self, config: DebertaConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1149,15 +1151,15 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1233,15 +1235,15 @@ def __init__(self, config: DebertaConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1314,15 +1316,15 @@ def __init__(self, config: DebertaConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1390,16 +1392,16 @@ def __init__(self, config: DebertaConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index b3c210352a32..82b0a30c5a50 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -15,6 +15,8 @@ """ TF 2.0 DeBERTa-v2 model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import numpy as np @@ -1014,11 +1016,11 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1186,11 +1188,11 @@ def __init__(self, config: DebertaV2Config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1244,15 +1246,15 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1329,15 +1331,15 @@ def __init__(self, config: DebertaV2Config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1411,15 +1413,15 @@ def __init__(self, config: DebertaV2Config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1488,16 +1490,16 @@ def __init__(self, config: DebertaV2Config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py index a3d487021d4b..131939f5bcfa 100644 --- a/src/transformers/models/deit/modeling_tf_deit.py +++ b/src/transformers/models/deit/modeling_tf_deit.py @@ -15,6 +15,8 @@ """ TensorFlow DeiT model.""" +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -95,8 +97,8 @@ class token). logits: tf.Tensor = None cls_logits: tf.Tensor = None distillation_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFDeiTEmbeddings(tf.keras.layers.Layer): @@ -142,7 +144,7 @@ def build(self, input_shape: tf.TensorShape): super().build(input_shape) def call( - self, pixel_values: tf.Tensor, bool_masked_pos: Optional[tf.Tensor] = None, training: bool = False + self, pixel_values: tf.Tensor, bool_masked_pos: tf.Tensor | None = None, training: bool = False ) -> tf.Tensor: embeddings = self.patch_embeddings(pixel_values) batch_size, seq_length, _ = shape_list(embeddings) @@ -501,9 +503,9 @@ def get_head_mask(self, head_mask): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -658,9 +660,9 @@ def __init__( ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -768,9 +770,9 @@ def __init__(self, config: DeiTConfig) -> None: @replace_return_docstrings(output_type=TFMaskedImageModelingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -898,9 +900,9 @@ def __init__(self, config: DeiTConfig): @replace_return_docstrings(output_type=TFImageClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1016,8 +1018,8 @@ def __init__(self, config: DeiTConfig) -> None: ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 3013f4ca30d7..85a98c2a77fb 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -16,6 +16,9 @@ TF 2.0 DistilBERT model """ + +from __future__ import annotations + import warnings from typing import Optional, Tuple, Union @@ -538,10 +541,10 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -639,14 +642,14 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -725,14 +728,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -805,14 +808,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -897,14 +900,14 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1004,15 +1007,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 565ad37b2117..008e6a39fdc9 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -15,8 +15,10 @@ """ TensorFlow DPR model for Open Domain Question Answering.""" +from __future__ import annotations + from dataclasses import dataclass -from typing import Optional, Tuple, Union +from typing import Tuple, Union import tensorflow as tf @@ -80,8 +82,8 @@ class TFDPRContextEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -108,8 +110,8 @@ class TFDPRQuestionEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -141,8 +143,8 @@ class TFDPRReaderOutput(ModelOutput): start_logits: tf.Tensor = None end_logits: tf.Tensor = None relevance_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFDPREncoderLayer(tf.keras.layers.Layer): @@ -167,9 +169,9 @@ def __init__(self, config: DPRConfig, **kwargs): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = None, output_hidden_states: bool = None, return_dict: bool = None, @@ -227,8 +229,8 @@ def __init__(self, config: DPRConfig, **kwargs): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -283,9 +285,9 @@ def __init__(self, config: DPRConfig, **kwargs): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -316,9 +318,9 @@ def __init__(self, config: DPRConfig, **kwargs): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -552,9 +554,9 @@ def get_input_embeddings(self): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions=None, output_hidden_states=None, return_dict=None, @@ -639,9 +641,9 @@ def get_input_embeddings(self): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions=None, output_hidden_states=None, return_dict=None, @@ -725,8 +727,8 @@ def get_input_embeddings(self): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = None, output_hidden_states: bool = None, return_dict=None, diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 82c3381724dc..7602d43cc0ca 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF Electra model.""" + +from __future__ import annotations + import math import warnings from dataclasses import dataclass @@ -312,9 +315,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -391,9 +394,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -704,14 +707,14 @@ def get_head_mask(self, head_mask): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -824,8 +827,8 @@ class TFElectraForPreTrainingOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None ELECTRA_START_DOCSTRING = r""" @@ -941,14 +944,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1034,12 +1037,12 @@ def __init__(self, config, **kwargs): @replace_return_docstrings(output_type=TFElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1171,16 +1174,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1281,16 +1284,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1372,16 +1375,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1490,16 +1493,16 @@ def __init__(self, config, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1573,17 +1576,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 5ec7f2932f59..f5cd5e445aac 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -14,6 +14,9 @@ # limitations under the License. """ Classes to support TF Encoder-Decoder architectures""" + +from __future__ import annotations + import inspect import re import warnings @@ -482,15 +485,15 @@ def from_encoder_decoder_pretrained( @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index 135c16a14b36..df4ea54f83bc 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -14,6 +14,9 @@ # limitations under the License. """ PyTorch ESM model.""" + +from __future__ import annotations + import os from typing import Optional, Tuple, Union @@ -312,11 +315,11 @@ def transpose_for_scores(self, x: tf.Tensor) -> tf.Tensor: def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -801,13 +804,13 @@ def _prune_heads(self, heads_to_prune): def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -984,13 +987,13 @@ def __init__(self, config: EsmConfig, add_pooling_layer=True, *inputs, **kwargs) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1113,14 +1116,14 @@ def get_lm_head(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1261,12 +1264,12 @@ def __init__(self, config): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1356,12 +1359,12 @@ def __init__(self, config): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index b1dd523dedaf..7f93caebb000 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -16,6 +16,9 @@ TF 2.0 Flaubert model. """ + +from __future__ import annotations + import itertools import random import warnings @@ -255,15 +258,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -486,15 +489,15 @@ def set_input_embeddings(self, value): @unpack_inputs def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -754,8 +757,8 @@ class TFFlaubertWithLMHeadModelOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @add_start_docstrings( @@ -803,15 +806,15 @@ def prepare_inputs_for_generation(self, inputs, **kwargs): ) def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -874,19 +877,19 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -960,20 +963,20 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1064,19 +1067,19 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1172,19 +1175,19 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: if input_ids is not None: diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 84254f2b288c..fa077d612d21 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 Funnel model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -995,8 +998,8 @@ class TFFunnelForPreTrainingOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None FUNNEL_START_DOCSTRING = r""" @@ -1110,10 +1113,10 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1158,10 +1161,10 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1206,10 +1209,10 @@ def __init__(self, config: FunnelConfig, **kwargs) -> None: @replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1285,14 +1288,14 @@ def get_prefix_bias_name(self) -> str: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]: r""" @@ -1357,14 +1360,14 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]: r""" @@ -1441,14 +1444,14 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]: r""" @@ -1550,14 +1553,14 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]: r""" @@ -1626,15 +1629,15 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]: r""" diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index b7cb1b6df2a2..6b7476b71bba 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -15,6 +15,8 @@ # limitations under the License. """ TF 2.0 OpenAI GPT-2 model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -345,15 +347,15 @@ def _prune_heads(self, heads_to_prune): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -583,9 +585,9 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput): logits: tf.Tensor = None mc_logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None GPT2_START_DOCSTRING = r""" @@ -716,15 +718,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -844,20 +846,20 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache= ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -963,14 +965,14 @@ def __init__(self, config, *inputs, **kwargs): @replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - mc_token_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + mc_token_ids: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1124,18 +1126,18 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutputWithPast, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index fbef4f0effc7..09e4330eb182 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -14,6 +14,8 @@ # limitations under the License. """ TF 2.0 GPT-J model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -171,8 +173,8 @@ def _attn( query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, ) -> Tuple[tf.Tensor, tf.Tensor]: # compute causal mask from causal mask buffer query_length, key_length = shape_list(query)[-2], shape_list(key)[-2] @@ -207,9 +209,9 @@ def call( self, hidden_states: tf.Tensor, layer_past: Optional[Tuple[tf.Tensor, tf.Tensor]] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, use_cache: bool = False, output_attentions: bool = False, ): @@ -301,10 +303,10 @@ def __init__(self, config: GPTJConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - layer_past: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + layer_past: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, use_cache: bool = False, output_attentions: bool = False, ): @@ -659,13 +661,13 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -762,14 +764,14 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache= ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -865,14 +867,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -987,15 +989,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 4891931c20ab..718884720980 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -15,6 +15,8 @@ """ TF 2.0 GroupViT model.""" +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -247,7 +249,7 @@ class TFGroupViTModelOutput(ModelOutput): The output of the [`TFGroupViTVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None segmentation_logits: tf.Tensor = None @@ -647,7 +649,7 @@ def split_x(self, x: tf.Tensor) -> tf.Tensor: else: return x, None - def concat_x(self, x: tf.Tensor, group_token: Optional[tf.Tensor] = None) -> tf.Tensor: + def concat_x(self, x: tf.Tensor, group_token: tf.Tensor | None = None) -> tf.Tensor: if group_token is None: return x return tf.concat([x, group_token], axis=1) @@ -655,7 +657,7 @@ def concat_x(self, x: tf.Tensor, group_token: Optional[tf.Tensor] = None) -> tf. def call( self, hidden_states: tf.Tensor, - prev_group_token: Optional[tf.Tensor] = None, + prev_group_token: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -1138,9 +1140,9 @@ def set_input_embeddings(self, value: tf.Variable): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1183,7 +1185,7 @@ def get_input_embeddings(self) -> tf.keras.layers.Layer: @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1262,9 +1264,9 @@ def build(self, input_shape: tf.TensorShape): @unpack_inputs def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1298,7 +1300,7 @@ def get_text_features( @unpack_inputs def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1325,10 +1327,10 @@ def get_image_features( @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1635,9 +1637,9 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTTextConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1731,7 +1733,7 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTVisionConfig) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1831,9 +1833,9 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFGroupViTModelOutput: @add_start_docstrings_to_model_forward(GROUPVIT_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1872,7 +1874,7 @@ def get_text_features( @add_start_docstrings_to_model_forward(GROUPVIT_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1916,10 +1918,10 @@ def get_image_features( @replace_return_docstrings(output_type=TFGroupViTModelOutput, config_class=GroupViTConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 24cbde9af7c3..fd1f17edfb3a 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ TensorFlow Hubert model.""" + +from __future__ import annotations + import warnings from typing import Any, Dict, Optional, Tuple, Union @@ -642,12 +645,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -812,7 +815,7 @@ def __init__(self, config: HubertConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -856,7 +859,7 @@ def __init__(self, config: HubertConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -890,7 +893,7 @@ def __init__(self, config: HubertConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -958,7 +961,7 @@ def __init__(self, config: HubertConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -1048,7 +1051,7 @@ def _conv_out_length(input_length, kernel_size, stride): return input_lengths - def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optional[tf.Tensor] = None): + def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: tf.Tensor | None = None): """ Masks extracted features along time axis and/or along feature axis according to [SpecAugment](https://arxiv.org/abs/1904.08779). @@ -1096,13 +1099,13 @@ def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optio def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - output_attentions: Optional[tf.Tensor] = None, - output_hidden_states: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + output_attentions: tf.Tensor | None = None, + output_hidden_states: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: bool = False, **kwargs: Any, @@ -1299,11 +1302,11 @@ def __init__(self, config: HubertConfig, *inputs, **kwargs): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1401,13 +1404,13 @@ def freeze_feature_encoder(self): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 2755e055370b..67128e0c1338 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 LayoutLM model.""" + +from __future__ import annotations + import math import warnings from typing import Dict, Optional, Tuple, Union @@ -423,9 +426,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -502,9 +505,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -694,15 +697,15 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -919,15 +922,15 @@ def __init__(self, config: LayoutLMConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1039,17 +1042,17 @@ def get_prefix_bias_name(self) -> str: @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1162,17 +1165,17 @@ def __init__(self, config: LayoutLMConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1292,17 +1295,17 @@ def __init__(self, config: LayoutLMConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1418,18 +1421,18 @@ def __init__(self, config: LayoutLMConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 491ef186e522..67377c5baf8a 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -14,6 +14,9 @@ # limitations under the License. """TF 2.0 LayoutLMv3 model.""" + +from __future__ import annotations + import collections import math from typing import Dict, List, Optional, Tuple, Union @@ -222,11 +225,11 @@ def create_position_ids(self, input_ids: tf.Tensor, inputs_embeds: tf.Tensor) -> def call( self, - input_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, bbox: tf.Tensor = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, training: bool = False, ) -> tf.Tensor: if position_ids is None: @@ -319,11 +322,11 @@ def cogview_attention(self, attention_scores: tf.Tensor, alpha: Union[float, int def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: key_layer = self.transpose_for_scores(self.key(hidden_states)) @@ -398,11 +401,11 @@ def __init__(self, config: LayoutLMv3Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: self_outputs = self.self_attention( @@ -469,11 +472,11 @@ def __init__(self, config: LayoutLMv3Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: self_attention_outputs = self.attention( @@ -593,13 +596,13 @@ def _cal_2d_pos_emb(self, bbox: tf.Tensor): def call( self, hidden_states: tf.Tensor, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, training: bool = False, ) -> Union[ TFBaseModelOutput, @@ -778,7 +781,7 @@ def get_extended_attention_mask(self, attention_mask: tf.Tensor) -> tf.Tensor: return extended_attention_mask - def get_head_mask(self, head_mask: Optional[tf.Tensor]) -> Union[tf.Tensor, List[Optional[tf.Tensor]]]: + def get_head_mask(self, head_mask: tf.Tensor | None) -> Union[tf.Tensor, List[tf.Tensor | None]]: if head_mask is None: return [None] * self.config.num_hidden_layers @@ -806,14 +809,14 @@ def get_head_mask(self, head_mask: Optional[tf.Tensor]) -> Union[tf.Tensor, List @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1145,14 +1148,14 @@ def __init__(self, config, *inputs, **kwargs): @replace_return_docstrings(output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1272,18 +1275,18 @@ def __init__(self, config: LayoutLMv3Config, **kwargs): @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - bbox: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[ TFSequenceClassifierOutput, @@ -1392,18 +1395,18 @@ def __init__(self, config: LayoutLMv3Config, **kwargs): @replace_return_docstrings(output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[ TFTokenClassifierOutput, @@ -1514,18 +1517,18 @@ def __init__(self, config: LayoutLMv3Config, **kwargs): @replace_return_docstrings(output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - bbox: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: bool = False, ) -> Union[ diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 324482b4d245..4e815da33d9e 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -15,6 +15,8 @@ """ TF 2.0 LED model.""" +from __future__ import annotations + import random from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -1030,12 +1032,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training=False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -1238,12 +1240,12 @@ def __init__(self, config: LEDConfig, **kwargs): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - encoder_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + encoder_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1389,9 +1391,9 @@ class TFLEDEncoderBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -1452,14 +1454,14 @@ class TFLEDSeq2SeqModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_global_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None + encoder_global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -1517,16 +1519,16 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_global_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None + encoder_global_attentions: Tuple[tf.Tensor] | None = None LED_START_DOCSTRING = r""" @@ -2383,22 +2385,22 @@ def set_output_embeddings(self, value): @replace_return_docstrings(output_type=TFLEDSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFLEDEncoderBaseModelOutput] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): """ diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index c47df169655c..b5adb2c803e9 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -14,6 +14,9 @@ # limitations under the License. """Tensorflow Longformer model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Optional, Tuple, Union @@ -101,9 +104,9 @@ class TFLongformerBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -149,9 +152,9 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -193,11 +196,11 @@ class TFLongformerMaskedLMOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -241,12 +244,12 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -288,11 +291,11 @@ class TFLongformerSequenceClassifierOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -336,11 +339,11 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -382,11 +385,11 @@ class TFLongformerTokenClassifierOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_sep_token=True): @@ -2038,13 +2041,13 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -2113,17 +2116,17 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -2206,18 +2209,18 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -2369,17 +2372,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerSequenceClassifierOutput, Tuple[tf.Tensor]]: if input_ids is not None and not isinstance(input_ids, tf.Tensor): @@ -2491,17 +2494,17 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -2619,13 +2622,13 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index f02d0ff6446a..59bc18591405 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -16,6 +16,9 @@ # limitations under the License. """ TF 2.0 LXMERT model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -90,14 +93,14 @@ class TFLxmertModelOutput(ModelOutput): the self-attention heads. """ - language_output: Optional[tf.Tensor] = None - vision_output: Optional[tf.Tensor] = None - pooled_output: Optional[tf.Tensor] = None - language_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - language_attentions: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - cross_encoder_attentions: Optional[Tuple[tf.Tensor]] = None + language_output: tf.Tensor | None = None + vision_output: tf.Tensor | None = None + pooled_output: tf.Tensor | None = None + language_hidden_states: Tuple[tf.Tensor] | None = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + language_attentions: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + cross_encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -137,15 +140,15 @@ class TFLxmertForPreTrainingOutput(ModelOutput): """ - loss: Optional[tf.Tensor] = None - prediction_logits: Optional[tf.Tensor] = None - cross_relationship_score: Optional[tf.Tensor] = None - question_answering_score: Optional[tf.Tensor] = None - language_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - language_attentions: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - cross_encoder_attentions: Optional[Tuple[tf.Tensor]] = None + loss: tf.Tensor | None = None + prediction_logits: tf.Tensor | None = None + cross_relationship_score: tf.Tensor | None = None + question_answering_score: tf.Tensor | None = None + language_hidden_states: Tuple[tf.Tensor] | None = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + language_attentions: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + cross_encoder_attentions: Tuple[tf.Tensor] | None = None class TFLxmertVisualFeatureEncoder(tf.keras.layers.Layer): @@ -945,13 +948,13 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - visual_feats: Optional[tf.Tensor] = None, - visual_pos: Optional[tf.Tensor] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - visual_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + visual_feats: tf.Tensor | None = None, + visual_pos: tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + visual_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 175115883201..208e9b8335d7 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -15,6 +15,8 @@ """ TF 2.0 Marian model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -165,7 +167,7 @@ def _init_weight(n_pos: int, dim: int): return table def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -212,12 +214,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -347,8 +349,8 @@ def __init__(self, config: MarianConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -417,11 +419,11 @@ def __init__(self, config: MarianConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -708,10 +710,10 @@ def set_embed_tokens(self, embed_tokens): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -870,15 +872,15 @@ def set_embed_tokens(self, embed_tokens): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1097,18 +1099,18 @@ def set_input_embeddings(self, new_embeddings): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1202,18 +1204,18 @@ def get_decoder(self): ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - encoder_outputs: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + encoder_outputs: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1330,23 +1332,23 @@ def set_bias(self, value): @add_end_docstrings(MARIAN_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): r""" diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 13453bd22dba..293c564141b3 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -15,6 +15,8 @@ """ TF 2.0 MBart model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -131,7 +133,7 @@ def call( self, input_shape: Optional[tf.TensorShape] = None, past_key_values_length: int = 0, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -181,12 +183,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -384,12 +386,12 @@ def __init__(self, config: MBartConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -700,10 +702,10 @@ def set_embed_tokens(self, embed_tokens): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -868,14 +870,14 @@ def set_embed_tokens(self, embed_tokens): def call( self, input_ids: TFModelInputType = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1100,17 +1102,17 @@ def set_input_embeddings(self, new_embeddings): def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1208,17 +1210,17 @@ def get_decoder(self): def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1336,22 +1338,22 @@ def set_bias(self, value): def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: """ diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index c47cde847de4..eddb339074a3 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 MobileBERT model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -846,11 +849,11 @@ class TFMobileBertForPreTrainingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None prediction_logits: tf.Tensor = None seq_relationship_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None MOBILEBERT_START_DOCSTRING = r""" @@ -969,12 +972,12 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1033,17 +1036,17 @@ def get_prefix_bias_name(self): @replace_return_docstrings(output_type=TFMobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMobileBertForPreTrainingOutput]: r""" @@ -1141,16 +1144,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMaskedLMOutput]: r""" @@ -1224,16 +1227,16 @@ def __init__(self, config, *inputs, **kwargs): @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFNextSentencePredictorOutput]: r""" @@ -1335,16 +1338,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" @@ -1430,17 +1433,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFQuestionAnsweringModelOutput]: r""" @@ -1546,16 +1549,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMultipleChoiceModelOutput]: r""" @@ -1674,16 +1677,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTokenClassifierOutput]: r""" diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py index 1b06f36536d6..879c642800fe 100644 --- a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py @@ -16,6 +16,8 @@ # Original license: https://github.com/apple/ml-cvnets/blob/main/LICENSE """ TensorFlow 2.0 MobileViT model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import tensorflow as tf @@ -663,7 +665,7 @@ class PreTrainedModel @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -846,7 +848,7 @@ def __init__(self, config: MobileViTConfig, expand_output: bool = True, *inputs, ) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -893,9 +895,9 @@ def __init__(self, config: MobileViTConfig, *inputs, **kwargs) -> None: ) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, ) -> Union[tuple, TFImageClassifierOutputWithNoAttention]: @@ -1083,8 +1085,8 @@ def masked_loss(real, pred): @replace_return_docstrings(output_type=TFSemanticSegmenterOutputWithNoAttention, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 08db31017308..2f4178d6cfc9 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -16,6 +16,8 @@ """ TF 2.0 MPNet model.""" +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -682,11 +684,11 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -795,15 +797,15 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -898,15 +900,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -987,15 +989,15 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1096,15 +1098,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1176,16 +1178,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, training: bool = False, **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 7c04520c9c1f..3f8967241946 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -15,6 +15,8 @@ # limitations under the License. """ TF 2.0 OpenAI GPT model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import Optional, Tuple, Union @@ -237,12 +239,12 @@ def _prune_heads(self, heads_to_prune): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -394,8 +396,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): logits: tf.Tensor = None mc_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None OPENAI_GPT_START_DOCSTRING = r""" @@ -514,12 +516,12 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -576,16 +578,16 @@ def set_output_embeddings(self, value): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFCausalLMOutput]: r""" @@ -661,13 +663,13 @@ def __init__(self, config, *inputs, **kwargs): @replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - mc_token_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + mc_token_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -809,16 +811,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 1855fcb1bc03..227e56fdef55 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -15,6 +15,8 @@ """ TF 2.0 OPT model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -152,12 +154,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -291,8 +293,8 @@ def __init__(self, config: OPTConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, output_attentions: Optional[bool] = False, @@ -552,10 +554,10 @@ def _prepare_decoder_attention_mask(self, attention_mask, input_shape, past_key_ @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -732,11 +734,11 @@ def set_input_embeddings(self, new_embeddings): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -804,11 +806,11 @@ def set_input_embeddings(self, new_embeddings): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -901,13 +903,13 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache= ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 1ccccc2dc5ce..7de1542ebe47 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -15,6 +15,8 @@ """ TF 2.0 Pegasus model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -167,7 +169,7 @@ def _init_weight(n_pos: int, dim: int): return table def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -214,12 +216,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -419,12 +421,12 @@ def __init__(self, config: PegasusConfig, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -712,10 +714,10 @@ def set_embed_tokens(self, embed_tokens): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -877,14 +879,14 @@ def set_embed_tokens(self, embed_tokens): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1106,18 +1108,18 @@ def set_input_embeddings(self, new_embeddings): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1211,18 +1213,18 @@ def get_decoder(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1339,23 +1341,23 @@ def set_bias(self, value): @add_end_docstrings(PEGASUS_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: """ diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 0ea2e554489b..d91fa71df8a6 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -15,6 +15,9 @@ """TFRAG model implementation.""" + +from __future__ import annotations + import copy from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -111,22 +114,22 @@ class TFRetrievAugLMMarginOutput(ModelOutput): average in the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - doc_scores: Optional[tf.Tensor] = None - retrieved_doc_embeds: Optional[tf.Tensor] = None - retrieved_doc_ids: Optional[tf.Tensor] = None - context_input_ids: Optional[tf.Tensor] = None - context_attention_mask: Optional[tf.Tensor] = None - question_encoder_last_hidden_state: Optional[tf.Tensor] = None - question_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - question_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_enc_last_hidden_state: Optional[tf.Tensor] = None - generator_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_dec_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_dec_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + doc_scores: tf.Tensor | None = None + retrieved_doc_embeds: tf.Tensor | None = None + retrieved_doc_ids: tf.Tensor | None = None + context_input_ids: tf.Tensor | None = None + context_attention_mask: tf.Tensor | None = None + question_encoder_last_hidden_state: tf.Tensor | None = None + question_enc_hidden_states: Tuple[tf.Tensor] | None = None + question_enc_attentions: Tuple[tf.Tensor] | None = None + generator_enc_last_hidden_state: tf.Tensor | None = None + generator_enc_hidden_states: Tuple[tf.Tensor] | None = None + generator_enc_attentions: Tuple[tf.Tensor] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor] | None = None + generator_dec_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -196,20 +199,20 @@ class TFRetrievAugLMOutput(ModelOutput): """ logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - doc_scores: Optional[tf.Tensor] = None - retrieved_doc_embeds: Optional[tf.Tensor] = None - retrieved_doc_ids: Optional[tf.Tensor] = None - context_input_ids: Optional[tf.Tensor] = None - context_attention_mask: Optional[tf.Tensor] = None - question_encoder_last_hidden_state: Optional[tf.Tensor] = None - question_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - question_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_enc_last_hidden_state: Optional[tf.Tensor] = None - generator_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_dec_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_dec_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + doc_scores: tf.Tensor | None = None + retrieved_doc_embeds: tf.Tensor | None = None + retrieved_doc_ids: tf.Tensor | None = None + context_input_ids: tf.Tensor | None = None + context_attention_mask: tf.Tensor | None = None + question_encoder_last_hidden_state: tf.Tensor | None = None + question_enc_hidden_states: Tuple[tf.Tensor] | None = None + question_enc_attentions: Tuple[tf.Tensor] | None = None + generator_enc_last_hidden_state: tf.Tensor | None = None + generator_enc_hidden_states: Tuple[tf.Tensor] | None = None + generator_enc_attentions: Tuple[tf.Tensor] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor] | None = None + generator_dec_attentions: Tuple[tf.Tensor] | None = None class TFRagPreTrainedModel(TFPreTrainedModel): @@ -545,15 +548,15 @@ def set_retriever(self, retriever: RagRetriever): @replace_return_docstrings(output_type=TFRetrievAugLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -841,22 +844,22 @@ def marginalize(self, seq_logits, doc_scores, n_docs=None): @replace_return_docstrings(output_type=TFRetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_retrieved: Optional[bool] = None, n_docs: Optional[int] = None, do_marginalize: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, reduce_loss: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -993,8 +996,8 @@ def call( def generate( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: tf.Tensor | None = None, context_input_ids=None, context_attention_mask=None, doc_scores=None, @@ -1347,22 +1350,22 @@ def question_encoder(self): @replace_return_docstrings(output_type=TFRetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_retrieved: Optional[bool] = None, n_docs: Optional[int] = None, exclude_bos_score: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, reduce_loss: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -1579,8 +1582,8 @@ def gather2d(target, id_tensor): def generate( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: tf.Tensor | None = None, context_input_ids=None, context_attention_mask=None, doc_scores=None, diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index c4dc8c5a148b..097bd977a4a1 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -15,6 +15,8 @@ """ TF 2.0 RemBERT model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -382,9 +384,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -645,14 +647,14 @@ class PreTrainedModel # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -946,14 +948,14 @@ def __init__(self, config: RemBertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1048,16 +1050,16 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1137,20 +1139,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1259,16 +1261,16 @@ def __init__(self, config: RemBertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1348,16 +1350,16 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1466,16 +1468,16 @@ def __init__(self, config: RemBertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1544,17 +1546,17 @@ def __init__(self, config: RemBertConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 7aa2c9e07a3d..585c4d31ad0d 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 RoBERTa model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -431,9 +434,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -510,9 +513,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -609,14 +612,14 @@ class PreTrainedModel # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -923,14 +926,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1081,16 +1084,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1178,20 +1181,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1329,16 +1332,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1423,16 +1426,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1539,16 +1542,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1624,17 +1627,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py index fedfea56a7a9..80a834ad5854 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 RoBERTa-PreLayerNorm model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -435,9 +438,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -514,9 +517,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -610,14 +613,14 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -925,14 +928,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1090,16 +1093,16 @@ def get_prefix_bias_name(self): # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForMaskedLM.call with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1194,20 +1197,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1349,16 +1352,16 @@ def __init__(self, config, *inputs, **kwargs): # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForSequenceClassification.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1446,16 +1449,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1563,16 +1566,16 @@ def __init__(self, config, *inputs, **kwargs): # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForTokenClassification.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1649,17 +1652,17 @@ def __init__(self, config, *inputs, **kwargs): # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForQuestionAnswering.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 2d1387d2d8d8..50b57571461d 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -15,6 +15,8 @@ """ TF 2.0 RoFormer model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -604,11 +606,11 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -809,11 +811,11 @@ def __init__(self, config: RoFormerConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -866,15 +868,15 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -940,15 +942,15 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutput, Tuple[tf.Tensor]]: r""" @@ -1049,15 +1051,15 @@ def __init__(self, config: RoFormerConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1138,15 +1140,15 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1250,15 +1252,15 @@ def __init__(self, config: RoFormerConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1326,16 +1328,16 @@ def __init__(self, config: RoFormerConfig, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index ddd8e526a79a..8b00f13b4415 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -17,6 +17,9 @@ discrepancy, the original file should be regarded as the 'reference' version. """ + +from __future__ import annotations + import collections from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Union @@ -26,7 +29,7 @@ from ...activations_tf import ACT2FN from ...modeling_tf_outputs import TFBaseModelOutput -from ...modeling_tf_utils import TFPreTrainedModel, shape_list, unpack_inputs +from ...modeling_tf_utils import TFModelInputType, TFPreTrainedModel, shape_list, unpack_inputs from ...tf_utils import flatten, functional_layernorm from ...utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_sam import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig @@ -69,10 +72,10 @@ class TFSamVisionEncoderOutput(ModelOutput): heads. """ - image_embeds: Optional[tf.Tensor] = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -106,9 +109,9 @@ class TFSamImageSegmentationOutput(ModelOutput): iou_scores: tf.Tensor = None pred_masks: tf.Tensor = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - mask_decoder_attentions: Optional[Tuple[tf.Tensor]] = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + mask_decoder_attentions: Tuple[tf.Tensor] | None = None class TFSamPatchEmbeddings(tf.keras.layers.Layer): @@ -734,9 +737,9 @@ def call( self, batch_size: Optional[int], input_points: Optional[Tuple[tf.Tensor, tf.Tensor]], - input_labels: Optional[tf.Tensor], - input_boxes: Optional[tf.Tensor], - input_masks: Optional[tf.Tensor], + input_labels: tf.Tensor | None, + input_boxes: tf.Tensor | None, + input_masks: tf.Tensor | None, ) -> Tuple[tf.Tensor, tf.Tensor]: """ Embeds different types of prompts, returning both sparse and dense embeddings. @@ -1084,7 +1087,7 @@ def get_input_embeddings(self): def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1326,10 +1329,10 @@ def get_image_embeddings( def get_prompt_embeddings( self, - input_points: Optional[tf.Tensor] = None, - input_labels: Optional[tf.Tensor] = None, - input_boxes: Optional[tf.Tensor] = None, - input_masks: Optional[tf.Tensor] = None, + input_points: tf.Tensor | None = None, + input_labels: tf.Tensor | None = None, + input_boxes: tf.Tensor | None = None, + input_masks: tf.Tensor | None = None, ): r""" Returns the prompt embeddings by passing the input points, labels, boxes and masks through the prompt encoder. @@ -1360,12 +1363,12 @@ def get_prompt_embeddings( @add_start_docstrings_to_model_forward(SAM_INPUTS_DOCSTRING) def call( self, - pixel_values: Optional[tf.Tensor] = None, - input_points: Optional[tf.Tensor] = None, - input_labels: Optional[tf.Tensor] = None, - input_boxes: Optional[tf.Tensor] = None, - input_masks: Optional[tf.Tensor] = None, - image_embeddings: Optional[tf.Tensor] = None, + pixel_values: TFModelInputType | None = None, + input_points: tf.Tensor | None = None, + input_labels: tf.Tensor | None = None, + input_boxes: tf.Tensor | None = None, + input_masks: tf.Tensor | None = None, + image_embeddings: tf.Tensor | None = None, multimask_output: bool = True, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py index c877e86acfab..47b7ce8e8c5c 100644 --- a/src/transformers/models/segformer/modeling_tf_segformer.py +++ b/src/transformers/models/segformer/modeling_tf_segformer.py @@ -14,6 +14,9 @@ # limitations under the License. """ TensorFlow SegFormer model.""" + +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -664,8 +667,8 @@ def __init__(self, config: SegformerConfig, *inputs, **kwargs): ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -816,7 +819,7 @@ def masked_loss(real, pred): def call( self, pixel_values: tf.Tensor, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index e5c38afa83cb..3651506894c7 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -15,6 +15,8 @@ """ TensorFlow Speech2Text model.""" +from __future__ import annotations + import random from typing import Dict, Optional, Tuple, Union @@ -273,12 +275,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -476,12 +478,12 @@ def __init__(self, config: Speech2TextConfig, **kwargs): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1253,16 +1255,16 @@ def get_decoder(self): ) def call( self, - input_features: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1343,17 +1345,17 @@ def set_output_embeddings(self, new_embeddings): @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_features: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index 61352843c2f2..f75bf230c0ad 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -15,6 +15,8 @@ """ TF 2.0 Swin Transformer model.""" +from __future__ import annotations + import collections.abc import math import warnings @@ -95,9 +97,9 @@ class TFSwinEncoderOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -130,10 +132,10 @@ class TFSwinModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - pooler_output: Optional[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + pooler_output: tf.Tensor | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -165,11 +167,11 @@ class TFSwinMaskedImageModelingOutput(ModelOutput): include the spatial dimensions. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None reconstruction: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @property def logits(self): @@ -210,11 +212,11 @@ class TFSwinImageClassifierOutput(ModelOutput): include the spatial dimensions. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None def window_partition(input_feature: tf.Tensor, window_size: int) -> tf.Tensor: @@ -529,8 +531,8 @@ def transpose_for_scores(self, x: tf.Tensor) -> tf.Tensor: def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor, ...]: @@ -619,8 +621,8 @@ def prune_heads(self, heads): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> tf.Tensor: @@ -683,7 +685,7 @@ def __init__( self.intermediate = TFSwinIntermediate(config, dim, name="intermediate") self.swin_output = TFSwinOutput(config, dim, name="output") - def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> Optional[tf.Tensor]: + def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> tf.Tensor | None: img_mask = tf.zeros((height, width)) height_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1)) width_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1)) @@ -725,7 +727,7 @@ def call( self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> tf.Tensor: @@ -832,7 +834,7 @@ def call( self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor, ...]: @@ -886,7 +888,7 @@ def call( self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -1128,9 +1130,9 @@ def get_head_mask(self, head_mask: Optional[Any]) -> List: @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1210,9 +1212,9 @@ def __init__( @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1317,9 +1319,9 @@ def __init__(self, config: SwinConfig): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1449,9 +1451,9 @@ def __init__(self, config: SwinConfig): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index ec3e67db26d1..012f0c41b017 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 T5 model.""" + +from __future__ import annotations + import copy import itertools import math @@ -1148,16 +1151,16 @@ def get_decoder(self): @replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1327,17 +1330,17 @@ def get_decoder(self): @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1548,10 +1551,10 @@ def get_encoder(self): @replace_return_docstrings(output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index f876730b095d..b17fddc32720 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -14,6 +14,9 @@ # limitations under the License. """TF 2.0 TAPAS model.""" + +from __future__ import annotations + import enum import math from dataclasses import dataclass @@ -132,11 +135,11 @@ class TFTableQuestionAnsweringOutput(ModelOutput): the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - logits_aggregation: Optional[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + logits_aggregation: tf.Tensor | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFTapasEmbeddings(tf.keras.layers.Layer): @@ -486,9 +489,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -565,9 +568,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -758,12 +761,12 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -984,12 +987,12 @@ def __init__(self, config: TapasConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1069,16 +1072,16 @@ def get_lm_head(self) -> tf.keras.layers.Layer: @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1281,21 +1284,21 @@ def __init__(self, config: TapasConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFTableQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - table_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - aggregation_labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - float_answer: Optional[Union[np.ndarray, tf.Tensor]] = None, - numeric_values: Optional[Union[np.ndarray, tf.Tensor]] = None, - numeric_values_scale: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + table_mask: np.ndarray | tf.Tensor | None = None, + aggregation_labels: np.ndarray | tf.Tensor | None = None, + float_answer: np.ndarray | tf.Tensor | None = None, + numeric_values: np.ndarray | tf.Tensor | None = None, + numeric_values_scale: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTableQuestionAnsweringOutput, Tuple[tf.Tensor]]: r""" @@ -1606,16 +1609,16 @@ def __init__(self, config: TapasConfig, *inputs, **kwargs): @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 93af21651112..decf18b8a7a0 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -17,6 +17,8 @@ TF 2.0 Transformer XL model. """ +from __future__ import annotations + from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -541,14 +543,14 @@ def _update_mems(self, hids, mems, mlen, qlen): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ): # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library @@ -722,8 +724,8 @@ class TFTransfoXLModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -755,8 +757,8 @@ class TFTransfoXLLMHeadModelOutput(ModelOutput): prediction_scores: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -786,11 +788,11 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None TRANSFO_XL_START_DOCSTRING = r""" @@ -892,10 +894,10 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -971,14 +973,14 @@ def init_mems(self, bsz): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ): if input_ids is not None: @@ -1075,14 +1077,14 @@ def get_output_embeddings(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTransfoXLSequenceClassifierOutputWithPast]: r""" diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 439c5d668a93..ad39a0ae82ba 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -15,6 +15,8 @@ """ Classes to support TF Vision-Encoder-Text-Decoder architectures""" +from __future__ import annotations + import re import warnings from typing import Optional, Tuple, Union @@ -492,13 +494,13 @@ def from_encoder_decoder_pretrained( @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py index a2211f245ec5..6e0c65a813f1 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py @@ -15,6 +15,8 @@ """TensorFlow VisionTextDualEncoder model.""" +from __future__ import annotations + import re from typing import Optional, Tuple, Union @@ -340,12 +342,12 @@ def get_image_features( @replace_return_docstrings(output_type=TFCLIPOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, - token_type_ids: Optional[tf.Tensor] = None, + token_type_ids: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index 6d0c579a43db..6a07719c916c 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -15,6 +15,8 @@ """ TF 2.0 ViT model.""" +from __future__ import annotations + import collections.abc import math from typing import Dict, Optional, Tuple, Union @@ -487,8 +489,8 @@ class PreTrainedModel @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, @@ -675,8 +677,8 @@ def __init__(self, config: ViTConfig, *inputs, add_pooling_layer=True, **kwargs) ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, @@ -766,13 +768,13 @@ def __init__(self, config: ViTConfig, *inputs, **kwargs): ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index afb40478ccda..5f5c1a6830d6 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 ViT MAE (masked autoencoder) model.""" + +from __future__ import annotations + import collections.abc import math from copy import deepcopy @@ -74,8 +77,8 @@ class TFViTMAEModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None mask: tf.Tensor = None ids_restore: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -97,8 +100,8 @@ class TFViTMAEDecoderOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -125,12 +128,12 @@ class TFViTMAEForPreTrainingOutput(ModelOutput): the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None mask: tf.Tensor = None ids_restore: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None def get_2d_sincos_pos_embed(embed_dim, grid_size, add_cls_token=False): @@ -232,7 +235,7 @@ def build(self, input_shape: tf.TensorShape): super().build(input_shape) - def random_masking(self, sequence: tf.Tensor, noise: Optional[tf.Tensor] = None): + def random_masking(self, sequence: tf.Tensor, noise: tf.Tensor | None = None): """ Perform per-sample random masking by per-sample shuffling. Per-sample shuffling is done by argsort random noise. @@ -639,9 +642,9 @@ class PreTrainedModel @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -816,9 +819,9 @@ def get_input_embeddings(self): @replace_return_docstrings(output_type=TFViTMAEModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1107,9 +1110,9 @@ def forward_loss(self, pixel_values, pred, mask): @replace_return_docstrings(output_type=TFViTMAEForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index dcc59d7f7322..3ee16127b323 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -14,6 +14,9 @@ # limitations under the License. """ TensorFlow Wav2Vec2 model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -84,8 +87,8 @@ class TFWav2Vec2BaseModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None extract_features: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None def _sample_without_replacement(distribution, num_samples): @@ -673,12 +676,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -841,7 +844,7 @@ def __init__(self, config: Wav2Vec2Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -884,7 +887,7 @@ def __init__(self, config: Wav2Vec2Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -917,7 +920,7 @@ def __init__(self, config: Wav2Vec2Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -984,7 +987,7 @@ def __init__(self, config: Wav2Vec2Config, **kwargs): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -1074,7 +1077,7 @@ def _conv_out_length(input_length, kernel_size, stride): return input_lengths - def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optional[tf.Tensor] = None): + def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: tf.Tensor | None = None): """ Masks extracted features along time axis and/or along feature axis according to [SpecAugment](https://arxiv.org/abs/1904.08779). @@ -1122,11 +1125,11 @@ def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optio def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1367,11 +1370,11 @@ def __init__(self, config: Wav2Vec2Config, *inputs, **kwargs): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1473,13 +1476,13 @@ def freeze_feature_encoder(self): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -1639,11 +1642,11 @@ def freeze_base_model(self): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 0d2a2682cc97..11168df3f9ca 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -15,6 +15,8 @@ """ TensorFlow Whisper model.""" +from __future__ import annotations + import math import random from typing import Dict, Optional, Tuple, Union @@ -171,12 +173,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -376,12 +378,12 @@ def __init__(self, config: WhisperConfig, **kwargs): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1119,13 +1121,13 @@ def encoder(self): @unpack_inputs def call( self, - input_features: Optional[TFModelInputType] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, decoder_inputs_embeds: Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None, @@ -1234,17 +1236,17 @@ def resize_token_embeddings(self, new_num_tokens: int) -> tf.keras.layers.Embedd @unpack_inputs def call( self, - input_features: Optional[TFModelInputType] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, decoder_inputs_embeds: Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 1a0146bf19d7..236720ae49df 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -15,6 +15,8 @@ """ TF 2.0 XGLM model.""" +from __future__ import annotations + import math import random from typing import Any, Optional, Tuple, Union @@ -185,12 +187,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -337,12 +339,12 @@ def __init__(self, config: XGLMConfig, **kwargs: Any) -> None: def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -456,13 +458,13 @@ def set_input_embeddings(self, value: TFSharedEmbeddings) -> None: def _prepare_decoder_attention_mask( self, - attention_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, input_shape: tf.TensorShape, past_key_values_length: int, ) -> tf.Tensor: # create causal mask # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - combined_attention_mask: Optional[tf.Tensor] = None + combined_attention_mask: tf.Tensor | None = None if input_shape[-1] > 1: combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length) @@ -476,7 +478,7 @@ def _prepare_decoder_attention_mask( return combined_attention_mask - def embed_positions(self, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None) -> tf.Tensor: + def embed_positions(self, position_ids: np.ndarray | tf.Tensor | None = None) -> tf.Tensor: position_ids += self.offset positions = tf.gather(self._embed_positions_weights, position_ids, axis=0) return positions @@ -484,15 +486,15 @@ def embed_positions(self, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -785,15 +787,15 @@ def __init__( ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -905,16 +907,16 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache= ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index da9bd1c6034f..1815b27c8595 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -16,6 +16,9 @@ TF 2.0 XLM model. """ + +from __future__ import annotations + import itertools import warnings from dataclasses import dataclass @@ -558,8 +561,8 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None XLM_START_DOCSTRING = r""" @@ -833,15 +836,15 @@ def prepare_inputs_for_generation(self, inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -904,19 +907,19 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1010,19 +1013,19 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: if input_ids is not None: @@ -1133,19 +1136,19 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1217,20 +1220,20 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py index 2f51c032f150..ae2bae7d7aa7 100644 --- a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 XLM-RoBERTa model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -520,9 +523,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -599,9 +602,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -699,14 +702,14 @@ class PreTrainedModel # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -917,14 +920,14 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1077,16 +1080,16 @@ def get_prefix_bias_name(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1179,20 +1182,20 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attenti ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1332,16 +1335,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1429,16 +1432,16 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1546,16 +1549,16 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1632,17 +1635,17 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 52538ced57ed..1d8a6692c090 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -17,6 +17,9 @@ TF 2.0 XLNet model. """ + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -195,9 +198,9 @@ def call( attn_mask_g, r, seg_mat, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + mems: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ): @@ -369,9 +372,9 @@ def call( attn_mask, pos_emb, seg_mat, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + mems: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ): @@ -582,15 +585,15 @@ def relative_positional_encoding(self, qlen, klen, bsz=None): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -824,9 +827,9 @@ class TFXLNetModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -859,11 +862,11 @@ class TFXLNetLMHeadModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -893,11 +896,11 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -927,11 +930,11 @@ class TFXLNetForTokenClassificationOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -963,11 +966,11 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -999,12 +1002,12 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None XLNET_START_DOCSTRING = r""" @@ -1140,15 +1143,15 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1249,20 +1252,20 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_mems=N @replace_return_docstrings(output_type=TFXLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetLMHeadModelOutput, Tuple[tf.Tensor]]: r""" @@ -1379,20 +1382,20 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForSequenceClassificationOutput, Tuple[tf.Tensor]]: r""" @@ -1484,20 +1487,20 @@ def dummy_inputs(self): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForMultipleChoiceOutput, Tuple[tf.Tensor]]: r""" @@ -1604,20 +1607,20 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForTokenClassificationOutput, Tuple[tf.Tensor]]: r""" @@ -1689,21 +1692,21 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForQuestionAnsweringSimpleOutput, Tuple[tf.Tensor]]: r""" diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index ffe5e7de95b9..80e2d8ed1e09 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -386,9 +386,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -465,9 +465,9 @@ def call( hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -639,14 +639,14 @@ class PreTrainedModel @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -937,14 +937,14 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1038,16 +1038,16 @@ def get_lm_head(self) -> tf.keras.layers.Layer: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1129,20 +1129,20 @@ def prepare_inputs_for_generation(self, inputs, past_key_values=None, attention_ ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1274,16 +1274,16 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1362,16 +1362,16 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1487,16 +1487,16 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1566,17 +1566,17 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1777,12 +1777,12 @@ def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training=False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -1962,12 +1962,12 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs) def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ diff --git a/tests/generation/test_tf_logits_process.py b/tests/generation/test_tf_logits_process.py index a1f665c9a761..e87c843d9cb4 100644 --- a/tests/generation/test_tf_logits_process.py +++ b/tests/generation/test_tf_logits_process.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/generation/test_tf_utils.py b/tests/generation/test_tf_utils.py index 6fdad1ef6365..186e0c8d4327 100644 --- a/tests/generation/test_tf_utils.py +++ b/tests/generation/test_tf_utils.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/albert/test_modeling_tf_albert.py b/tests/models/albert/test_modeling_tf_albert.py index 104fb092529a..97b073e850e2 100644 --- a/tests/models/albert/test_modeling_tf_albert.py +++ b/tests/models/albert/test_modeling_tf_albert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import AlbertConfig, is_tf_available diff --git a/tests/models/auto/test_modeling_tf_auto.py b/tests/models/auto/test_modeling_tf_auto.py index 1a355d88bb5a..c8754ca42702 100644 --- a/tests/models/auto/test_modeling_tf_auto.py +++ b/tests/models/auto/test_modeling_tf_auto.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import tempfile import unittest diff --git a/tests/models/auto/test_modeling_tf_pytorch.py b/tests/models/auto/test_modeling_tf_pytorch.py index c60b8fc2f517..3e213f29562a 100644 --- a/tests/models/auto/test_modeling_tf_pytorch.py +++ b/tests/models/auto/test_modeling_tf_pytorch.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available, is_torch_available diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index 0f0f8f9793c0..c113011c567d 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import tempfile import unittest diff --git a/tests/models/bert/test_modeling_tf_bert.py b/tests/models/bert/test_modeling_tf_bert.py index 59521acec398..a8a2159fe13b 100644 --- a/tests/models/bert/test_modeling_tf_bert.py +++ b/tests/models/bert/test_modeling_tf_bert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BertConfig, is_tf_available diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py index 2db959e9f7f5..5fd6faefec86 100644 --- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py index 67a4f7ad7bfb..5bc5c4afe91d 100644 --- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available diff --git a/tests/models/blip/test_modeling_tf_blip.py b/tests/models/blip/test_modeling_tf_blip.py index 3bb7b87edbb5..af7533c69893 100644 --- a/tests/models/blip/test_modeling_tf_blip.py +++ b/tests/models/blip/test_modeling_tf_blip.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow Blip model. """ +from __future__ import annotations + import inspect import tempfile import unittest diff --git a/tests/models/blip/test_modeling_tf_blip_text.py b/tests/models/blip/test_modeling_tf_blip_text.py index 261056e918ee..2733a9fa6a4c 100644 --- a/tests/models/blip/test_modeling_tf_blip_text.py +++ b/tests/models/blip/test_modeling_tf_blip_text.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Testing suite for the TensorFlow Blip model. """ +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/bort/test_modeling_tf_bort.py b/tests/models/bort/test_modeling_tf_bort.py index 8053afbd30cf..35abe53d89d7 100644 --- a/tests/models/bort/test_modeling_tf_bort.py +++ b/tests/models/bort/test_modeling_tf_bort.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/camembert/test_modeling_tf_camembert.py b/tests/models/camembert/test_modeling_tf_camembert.py index dc542526852d..425bdbc4b0ac 100644 --- a/tests/models/camembert/test_modeling_tf_camembert.py +++ b/tests/models/camembert/test_modeling_tf_camembert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index 6cd20a47a7f0..10b9954fc884 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow CLIP model. """ +from __future__ import annotations + import inspect import os import tempfile diff --git a/tests/models/convbert/test_modeling_tf_convbert.py b/tests/models/convbert/test_modeling_tf_convbert.py index 0c259110e716..84ed4de818f6 100644 --- a/tests/models/convbert/test_modeling_tf_convbert.py +++ b/tests/models/convbert/test_modeling_tf_convbert.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/convnext/test_modeling_tf_convnext.py b/tests/models/convnext/test_modeling_tf_convnext.py index 72981c09d65e..8d049cf9f501 100644 --- a/tests/models/convnext/test_modeling_tf_convnext.py +++ b/tests/models/convnext/test_modeling_tf_convnext.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow ConvNext model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/ctrl/test_modeling_tf_ctrl.py b/tests/models/ctrl/test_modeling_tf_ctrl.py index c71c96bc9da9..4d94a97828cf 100644 --- a/tests/models/ctrl/test_modeling_tf_ctrl.py +++ b/tests/models/ctrl/test_modeling_tf_ctrl.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import CTRLConfig, is_tf_available diff --git a/tests/models/cvt/test_modeling_tf_cvt.py b/tests/models/cvt/test_modeling_tf_cvt.py index f0f7b9c5d823..78d95931b3b5 100644 --- a/tests/models/cvt/test_modeling_tf_cvt.py +++ b/tests/models/cvt/test_modeling_tf_cvt.py @@ -1,6 +1,8 @@ """ Testing suite for the Tensorflow CvT model. """ +from __future__ import annotations + import inspect import unittest from math import floor diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index dfa890d25a9e..6a30c83ebaf9 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Data2VecVision model. """ +from __future__ import annotations + import collections.abc import inspect import unittest diff --git a/tests/models/deberta/test_modeling_tf_deberta.py b/tests/models/deberta/test_modeling_tf_deberta.py index 424d9e0b2b41..9b69d55001cf 100644 --- a/tests/models/deberta/test_modeling_tf_deberta.py +++ b/tests/models/deberta/test_modeling_tf_deberta.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DebertaConfig, is_tf_available diff --git a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py index 60391635eedf..96ebe375d97b 100644 --- a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py +++ b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DebertaV2Config, is_tf_available diff --git a/tests/models/deit/test_modeling_tf_deit.py b/tests/models/deit/test_modeling_tf_deit.py index 223d164d4aaf..b350a5d546b0 100644 --- a/tests/models/deit/test_modeling_tf_deit.py +++ b/tests/models/deit/test_modeling_tf_deit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow DeiT model. """ +from __future__ import annotations + import inspect import unittest @@ -242,7 +244,7 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class.__name__ == "DeiTForImageClassificationWithTeacher": + if "labels" in inputs_dict and "labels" not in inspect.signature(model_class.call).parameters: del inputs_dict["labels"] return inputs_dict diff --git a/tests/models/distilbert/test_modeling_tf_distilbert.py b/tests/models/distilbert/test_modeling_tf_distilbert.py index 1f4f3c2b4697..4e96c909765a 100644 --- a/tests/models/distilbert/test_modeling_tf_distilbert.py +++ b/tests/models/distilbert/test_modeling_tf_distilbert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DistilBertConfig, is_tf_available diff --git a/tests/models/dpr/test_modeling_tf_dpr.py b/tests/models/dpr/test_modeling_tf_dpr.py index 64dea041b53f..f788a5163398 100644 --- a/tests/models/dpr/test_modeling_tf_dpr.py +++ b/tests/models/dpr/test_modeling_tf_dpr.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/electra/test_modeling_tf_electra.py b/tests/models/electra/test_modeling_tf_electra.py index ae092e8a17dc..fe60c5627103 100644 --- a/tests/models/electra/test_modeling_tf_electra.py +++ b/tests/models/electra/test_modeling_tf_electra.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import ElectraConfig, is_tf_available diff --git a/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py b/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py index 76ebd687f77e..aa22e961f653 100644 --- a/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py +++ b/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import os import tempfile diff --git a/tests/models/esm/test_modeling_tf_esm.py b/tests/models/esm/test_modeling_tf_esm.py index dc9d430d07ed..d06e3c59ba87 100644 --- a/tests/models/esm/test_modeling_tf_esm.py +++ b/tests/models/esm/test_modeling_tf_esm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import EsmConfig, is_tf_available diff --git a/tests/models/flaubert/test_modeling_tf_flaubert.py b/tests/models/flaubert/test_modeling_tf_flaubert.py index 6b7f4fc0316e..b751445d12cc 100644 --- a/tests/models/flaubert/test_modeling_tf_flaubert.py +++ b/tests/models/flaubert/test_modeling_tf_flaubert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/funnel/test_modeling_tf_funnel.py b/tests/models/funnel/test_modeling_tf_funnel.py index 6780605e8936..5aea7e4309b5 100644 --- a/tests/models/funnel/test_modeling_tf_funnel.py +++ b/tests/models/funnel/test_modeling_tf_funnel.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import FunnelConfig, is_tf_available diff --git a/tests/models/gpt2/test_modeling_tf_gpt2.py b/tests/models/gpt2/test_modeling_tf_gpt2.py index 7171997546d6..c69ab863373d 100644 --- a/tests/models/gpt2/test_modeling_tf_gpt2.py +++ b/tests/models/gpt2/test_modeling_tf_gpt2.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import GPT2Config, is_tf_available diff --git a/tests/models/gptj/test_modeling_tf_gptj.py b/tests/models/gptj/test_modeling_tf_gptj.py index 3aa63d2790a4..0e4dc9f5831d 100644 --- a/tests/models/gptj/test_modeling_tf_gptj.py +++ b/tests/models/gptj/test_modeling_tf_gptj.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import AutoTokenizer, GPTJConfig, is_tf_available diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index bd499a50fb6c..a80ef606e5fc 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow GroupViT model. """ +from __future__ import annotations + import inspect import os import random diff --git a/tests/models/hubert/test_modeling_tf_hubert.py b/tests/models/hubert/test_modeling_tf_hubert.py index a48ed0634e84..0b8e1e2df94a 100644 --- a/tests/models/hubert/test_modeling_tf_hubert.py +++ b/tests/models/hubert/test_modeling_tf_hubert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import inspect import math diff --git a/tests/models/layoutlm/test_modeling_tf_layoutlm.py b/tests/models/layoutlm/test_modeling_tf_layoutlm.py index 95e24023bb23..2d134f23d42c 100644 --- a/tests/models/layoutlm/test_modeling_tf_layoutlm.py +++ b/tests/models/layoutlm/test_modeling_tf_layoutlm.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index df103194ab25..a1e2cd590836 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow LayoutLMv3 model. """ +from __future__ import annotations + import copy import inspect import unittest diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py index 7bac1ced835b..8735aeb721d7 100644 --- a/tests/models/led/test_modeling_tf_led.py +++ b/tests/models/led/test_modeling_tf_led.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import LEDConfig, is_tf_available diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py index b5452bc80ac5..dcdd68b18ffd 100644 --- a/tests/models/longformer/test_modeling_tf_longformer.py +++ b/tests/models/longformer/test_modeling_tf_longformer.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/lxmert/test_modeling_tf_lxmert.py b/tests/models/lxmert/test_modeling_tf_lxmert.py index cd2095f693d6..411de960f31d 100644 --- a/tests/models/lxmert/test_modeling_tf_lxmert.py +++ b/tests/models/lxmert/test_modeling_tf_lxmert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py index 16b19b0f9763..5a624fda9a38 100644 --- a/tests/models/marian/test_modeling_tf_marian.py +++ b/tests/models/marian/test_modeling_tf_marian.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import tempfile import unittest import warnings diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py index b143fc6877b5..6c36d705e81b 100644 --- a/tests/models/mbart/test_modeling_tf_mbart.py +++ b/tests/models/mbart/test_modeling_tf_mbart.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import tempfile import unittest diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py index 69d2fc6768e4..293126ab6147 100644 --- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py +++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import MobileBertConfig, is_tf_available diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py index e4a956dff2c3..37d7db39e68d 100644 --- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow MobileViT model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/mpnet/test_modeling_tf_mpnet.py b/tests/models/mpnet/test_modeling_tf_mpnet.py index 4936a5289903..381b6e81dd8d 100644 --- a/tests/models/mpnet/test_modeling_tf_mpnet.py +++ b/tests/models/mpnet/test_modeling_tf_mpnet.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import MPNetConfig, is_tf_available diff --git a/tests/models/mt5/test_modeling_tf_mt5.py b/tests/models/mt5/test_modeling_tf_mt5.py index 0c934f0314c8..facb63dd7931 100644 --- a/tests/models/mt5/test_modeling_tf_mt5.py +++ b/tests/models/mt5/test_modeling_tf_mt5.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/openai/test_modeling_tf_openai.py b/tests/models/openai/test_modeling_tf_openai.py index a4cf71bf1a9f..a82da911a4c8 100644 --- a/tests/models/openai/test_modeling_tf_openai.py +++ b/tests/models/openai/test_modeling_tf_openai.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import OpenAIGPTConfig, is_tf_available diff --git a/tests/models/opt/test_modeling_tf_opt.py b/tests/models/opt/test_modeling_tf_opt.py index 0ae3411812dc..85514c9d7204 100644 --- a/tests/models/opt/test_modeling_tf_opt.py +++ b/tests/models/opt/test_modeling_tf_opt.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py index 6816cc34ef82..b34a3dcfb5cc 100644 --- a/tests/models/pegasus/test_modeling_tf_pegasus.py +++ b/tests/models/pegasus/test_modeling_tf_pegasus.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import tempfile import unittest diff --git a/tests/models/rag/test_modeling_tf_rag.py b/tests/models/rag/test_modeling_tf_rag.py index 4a0e4176b4e3..b4720f7c7f0d 100644 --- a/tests/models/rag/test_modeling_tf_rag.py +++ b/tests/models/rag/test_modeling_tf_rag.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json import os import shutil diff --git a/tests/models/regnet/test_modeling_tf_regnet.py b/tests/models/regnet/test_modeling_tf_regnet.py index f5f5cfd4b99d..cee3995d2100 100644 --- a/tests/models/regnet/test_modeling_tf_regnet.py +++ b/tests/models/regnet/test_modeling_tf_regnet.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow RegNet model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/rembert/test_modeling_tf_rembert.py b/tests/models/rembert/test_modeling_tf_rembert.py index 7ab71e9c6e24..e70bd7033fc1 100644 --- a/tests/models/rembert/test_modeling_tf_rembert.py +++ b/tests/models/rembert/test_modeling_tf_rembert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RemBertConfig, is_tf_available diff --git a/tests/models/resnet/test_modeling_tf_resnet.py b/tests/models/resnet/test_modeling_tf_resnet.py index 0a8ccc00415c..e6f8d121c278 100644 --- a/tests/models/resnet/test_modeling_tf_resnet.py +++ b/tests/models/resnet/test_modeling_tf_resnet.py @@ -15,6 +15,8 @@ """ Testing suite for the Tensorflow ResNet model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/roberta/test_modeling_tf_roberta.py b/tests/models/roberta/test_modeling_tf_roberta.py index efa54ba45f9c..3d7b6953c085 100644 --- a/tests/models/roberta/test_modeling_tf_roberta.py +++ b/tests/models/roberta/test_modeling_tf_roberta.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RobertaConfig, is_tf_available diff --git a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py index 6de20c0e1d04..4e1bd2e319af 100644 --- a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py +++ b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RobertaPreLayerNormConfig, is_tf_available diff --git a/tests/models/roformer/test_modeling_tf_roformer.py b/tests/models/roformer/test_modeling_tf_roformer.py index 0a632e39a2ed..52c630e2beae 100644 --- a/tests/models/roformer/test_modeling_tf_roformer.py +++ b/tests/models/roformer/test_modeling_tf_roformer.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RoFormerConfig, is_tf_available diff --git a/tests/models/sam/test_modeling_tf_sam.py b/tests/models/sam/test_modeling_tf_sam.py index fc8dd79765a0..4e918a1cd13e 100644 --- a/tests/models/sam/test_modeling_tf_sam.py +++ b/tests/models/sam/test_modeling_tf_sam.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow SAM model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index 79c58ebe401b..b831e8ddbc2b 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow SegFormer model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index 75789fd6d9b8..b283b4478bda 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Speech2Text model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/swin/test_modeling_tf_swin.py b/tests/models/swin/test_modeling_tf_swin.py index 32de917a11cb..a898d22fb132 100644 --- a/tests/models/swin/test_modeling_tf_swin.py +++ b/tests/models/swin/test_modeling_tf_swin.py @@ -15,6 +15,8 @@ """ Testing suite for the TF 2.0 Swin model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index a1d784ae2f9a..7a75f51cd768 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import T5Config, is_tf_available diff --git a/tests/models/tapas/test_modeling_tf_tapas.py b/tests/models/tapas/test_modeling_tf_tapas.py index c3cc5fae3a9c..ce98394cb868 100644 --- a/tests/models/tapas/test_modeling_tf_tapas.py +++ b/tests/models/tapas/test_modeling_tf_tapas.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import unittest diff --git a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py index 47880013b97e..ac820ea8fab0 100644 --- a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import random import unittest diff --git a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py index 1e594f5de551..04062014b846 100644 --- a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py +++ b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow VisionEncoderDecoder model. """ +from __future__ import annotations + import copy import os import tempfile diff --git a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py index 696a302722ab..1f27f831e8d7 100644 --- a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py @@ -15,6 +15,8 @@ """ Testing suite for the PyTorch VisionTextDualEncoder model. """ +from __future__ import annotations + import collections import tempfile import unittest diff --git a/tests/models/vit/test_modeling_tf_vit.py b/tests/models/vit/test_modeling_tf_vit.py index 111223de323b..72ca1b19dc2b 100644 --- a/tests/models/vit/test_modeling_tf_vit.py +++ b/tests/models/vit/test_modeling_tf_vit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow ViT model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py index 53d68b644ac8..d5e16e963850 100644 --- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow ViTMAE model. """ +from __future__ import annotations + import copy import inspect import json diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index e30aeb6aaa82..ef4c38e2a303 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import glob import inspect diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index a52994899ad8..b9ad982176ef 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Whisper model. """ +from __future__ import annotations + import inspect import tempfile import traceback diff --git a/tests/models/xglm/test_modeling_tf_xglm.py b/tests/models/xglm/test_modeling_tf_xglm.py index 61fd80572594..e2b8cc2e6cbc 100644 --- a/tests/models/xglm/test_modeling_tf_xglm.py +++ b/tests/models/xglm/test_modeling_tf_xglm.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import XGLMConfig, XGLMTokenizer, is_tf_available diff --git a/tests/models/xlm/test_modeling_tf_xlm.py b/tests/models/xlm/test_modeling_tf_xlm.py index 2b1fb2f963c0..5b576f02c91e 100644 --- a/tests/models/xlm/test_modeling_tf_xlm.py +++ b/tests/models/xlm/test_modeling_tf_xlm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py b/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py index 695a403b7b0b..1ecac55310fb 100644 --- a/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py +++ b/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py index bbc310aa8b1c..6d76462fda9e 100644 --- a/tests/models/xlnet/test_modeling_tf_xlnet.py +++ b/tests/models/xlnet/test_modeling_tf_xlnet.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import inspect import random import unittest diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 220560d9238a..02d5077e233c 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import inspect import json @@ -22,10 +24,9 @@ import tempfile import unittest import unittest.mock as mock -from dataclasses import fields from importlib import import_module from math import isnan -from typing import List, Tuple, get_type_hints +from typing import List, Tuple from datasets import Dataset from huggingface_hub import HfFolder, Repository, delete_repo @@ -140,26 +141,6 @@ def _config_zero_init(config): return configs_no_init -def _return_type_has_loss(model): - return_type = get_type_hints(model.call) - if "return" not in return_type: - return False - return_type = return_type["return"] - if hasattr(return_type, "__args__"): # Awkward check for union because UnionType only turns up in 3.10 - for type_annotation in return_type.__args__: - if inspect.isclass(type_annotation) and issubclass(type_annotation, ModelOutput): - field_names = [field.name for field in fields(type_annotation)] - if "loss" in field_names: - return True - return False - elif isinstance(return_type, tuple): - return False - elif isinstance(return_type, ModelOutput): - class_fields = fields(return_type) - return "loss" in class_fields - return False - - @require_tf class TFModelTesterMixin: model_tester = None @@ -1464,8 +1445,6 @@ def test_loss_computation(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) - if not getattr(model, "hf_compute_loss", None) and not _return_type_has_loss(model): - continue # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True) @@ -1480,7 +1459,11 @@ def test_loss_computation(self): input_name = possible_input_names.intersection(set(prepared_for_class)).pop() model_input = prepared_for_class.pop(input_name) - loss = model(model_input, **prepared_for_class)[0] + outputs = model(model_input, **prepared_for_class) + if not isinstance(outputs, ModelOutput) or not hasattr(outputs, "loss"): + continue + + loss = outputs.loss self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1]) # Test that model correctly compute the loss when we mask some positions @@ -1540,18 +1523,16 @@ def test_keras_fit(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) - if not getattr(model, "hf_compute_loss", False) and not _return_type_has_loss(model): - continue # Test that model correctly compute the loss with kwargs prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) - # Is there a better way to remove these decoder inputs? # We also remove "return_loss" as this is covered by the train_step when using fit() prepared_for_class = { key: val for key, val in prepared_for_class.items() - if key - not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "decoder_input_ids", "return_loss") + if key not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "return_loss") } + if "labels" in prepared_for_class and "decoder_input_ids" in prepared_for_class: + del prepared_for_class["decoder_input_ids"] accuracy_classes = [ "ForPreTraining", @@ -1575,8 +1556,10 @@ def test_keras_fit(self): sample_weight = tf.convert_to_tensor([0.5] * self.model_tester.batch_size, dtype=tf.float32) else: sample_weight = None - - model(model.dummy_inputs) # Build the model so we can get some constant weights + # Build the model so we can get some constant weights and check outputs + outputs = model(prepared_for_class) + if getattr(outputs, "loss", None) is None: + continue model_weights = model.get_weights() # Run eagerly to save some expensive compilation times @@ -1648,7 +1631,6 @@ def test_keras_fit(self): # Pass in all samples as a batch to match other `fit` calls weighted_dataset = weighted_dataset.batch(len(dataset)) dataset = dataset.batch(len(dataset)) - # Reinitialize to fix batchnorm again model.set_weights(model_weights) diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py index f144a7b8d933..ea5bc26986ae 100644 --- a/tests/utils/test_modeling_tf_core.py +++ b/tests/utils/test_modeling_tf_core.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import os import tempfile