From 3381fa19fc9e15a8bb9ff355f3e4c59770401f85 Mon Sep 17 00:00:00 2001 From: Sebastien Ehrhardt Date: Mon, 29 Apr 2024 12:41:17 +0100 Subject: [PATCH] add correct name --- docs/source/en/perf_infer_gpu_one.md | 2 +- utils/check_support_list.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/perf_infer_gpu_one.md b/docs/source/en/perf_infer_gpu_one.md index 532366aba06aee..b20f7dd6dc92e2 100644 --- a/docs/source/en/perf_infer_gpu_one.md +++ b/docs/source/en/perf_infer_gpu_one.md @@ -190,7 +190,7 @@ FlashAttention is more memory efficient, meaning you can train on much larger se PyTorch's [`torch.nn.functional.scaled_dot_product_attention`](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for `torch>=2.1.1` when an implementation is available. You may also set `attn_implementation="sdpa"` in `from_pretrained()` to explicitly request SDPA to be used. For now, Transformers supports SDPA inference and training for the following architectures: -* [Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio_spectrogram_transformer#transformers.ASTModel) +* [Audio Spectrogram Transformer](https://huggingface.co/docs/transformers/model_doc/audio-spectrogram-transformer#transformers.ASTModel) * [Bart](https://huggingface.co/docs/transformers/model_doc/bart#transformers.BartModel) * [Bert](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel) * [Cohere](https://huggingface.co/docs/transformers/model_doc/cohere#transformers.CohereModel) diff --git a/utils/check_support_list.py b/utils/check_support_list.py index f6aaa2bb67dce4..3cb0b616022426 100644 --- a/utils/check_support_list.py +++ b/utils/check_support_list.py @@ -84,7 +84,7 @@ def check_sdpa_support_list(): archs_supporting_sdpa.append(model_name) for arch in archs_supporting_sdpa: - if arch not in doctext: + if arch not in doctext and arch not in doctext.replace("-", "_"): raise ValueError( f"{arch} should be in listed in the SDPA documentation but is not. Please update the documentation." )