From 73436f78529e70e8336ed9022cc24bbdd8f05381 Mon Sep 17 00:00:00 2001 From: Alex-Brooks Date: Mon, 13 Jan 2025 02:19:47 +0000 Subject: [PATCH 1/2] Fix online qwen models Signed-off-by: Alex-Brooks --- vllm/transformers_utils/tokenizer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 97920f42ec52f..084f3591669f0 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -1,3 +1,4 @@ +import contextlib import os import warnings from pathlib import Path @@ -67,7 +68,14 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer: tokenizer.all_special_tokens_extended) tokenizer_all_special_tokens = set(tokenizer.all_special_tokens) tokenizer_len = len(tokenizer) + max_token_id = max(tokenizer.get_vocab().values()) + # Some tokenizers (e.g., QwenTokenizer) have special tokens that + # are added and included in the implementation of the vocab_size + # property, but not in get_vocab(); if there is an implementation + # of vocab size, we should take the greater value. + with contextlib.suppress(NotImplementedError): + max_token_id = max(max_token_id, tokenizer.vocab_size) class CachedTokenizer(tokenizer.__class__): # type: ignore From 7cd1505e5547ab9518fd51b570559f0b0801206a Mon Sep 17 00:00:00 2001 From: Alex-Brooks Date: Mon, 13 Jan 2025 02:41:19 +0000 Subject: [PATCH 2/2] Guard against vocab_size attribute to allow other tokenizer types Signed-off-by: Alex-Brooks --- vllm/transformers_utils/tokenizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 084f3591669f0..294262484f2fb 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -74,8 +74,9 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer: # are added and included in the implementation of the vocab_size # property, but not in get_vocab(); if there is an implementation # of vocab size, we should take the greater value. - with contextlib.suppress(NotImplementedError): - max_token_id = max(max_token_id, tokenizer.vocab_size) + if hasattr(tokenizer, "vocab_size"): + with contextlib.suppress(NotImplementedError): + max_token_id = max(max_token_id, tokenizer.vocab_size) class CachedTokenizer(tokenizer.__class__): # type: ignore