From c9339f7d1d9511ebf4afd58cac844a61bbf104c2 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 14 Jan 2025 10:00:07 +0000 Subject: [PATCH 1/2] Fix various bugs in multi-modal processor Signed-off-by: DarkLight1337 --- tests/multimodal/test_processing.py | 18 ++++++ vllm/multimodal/processing.py | 89 +++++++++++++---------------- vllm/multimodal/registry.py | 5 +- 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 54269c3ef7ce0..4d2c14b228f61 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -421,6 +421,7 @@ def test_find_replace_tokens( "pattern_1": [32000, 32000], "pattern_2": [], "pattern_3": [1550, 918, 1550], + "pattern_4": [32000], }, ], ) @@ -438,6 +439,14 @@ def test_find_replace_tokens( replacement=[32000, 32000], ), ], + "pattern_4": [ + PlaceholderInfo( + modality="pattern_4", + item_idx=0, + start_idx=3, + replacement=[32000], + ), + ], } ), @@ -466,6 +475,7 @@ def test_find_replace_tokens( replacement=[1550, 918, 1550], ), ], + # No match for pattern_4 as it has lower priority than pattern_1 } ), ( @@ -485,6 +495,14 @@ def test_find_replace_tokens( replacement=[32000, 32000], ), ], + "pattern_4": [ + PlaceholderInfo( + modality="pattern_4", + item_idx=0, + start_idx=5, + replacement=[32000], + ), + ], "pattern_3": [ PlaceholderInfo( modality="pattern_3", diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 8b47dfb07387f..1c8d0a762cb33 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -404,73 +404,62 @@ def replace_text_matches( return "".join(texts) -def _iter_modality_placeholders( +def _iter_placeholders( + mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]], prompt: list[int], - modality: str, - modality_repls: Sequence[BoundPromptReplacement], - modal_item_count: int, + mm_item_counts: Mapping[str, int], ) -> Iterable[PlaceholderInfo]: - if modal_item_count == 0: - return + """ + Yield each set of placeholder tokens found in :code:`prompt`. + + Matches are exclusive even when multiple modalities specify + the same placeholder tokens. In that case, the modality that + appears earlier in `mm_prompt_repls` takes priority. + Note that empty matches are ignored. + """ prompt_len = len(prompt) - item_idx = 0 + item_idx_by_modality = defaultdict[str, int](lambda: 0) start_idx = 0 while start_idx < prompt_len: found = False - for repl_info in modality_repls: - replacement = repl_info.get_replacement(item_idx) - repl_tokens = replacement.token_ids - repl_len = len(repl_tokens) - end_idx = start_idx + repl_len - - if repl_len == 0 or end_idx > prompt_len: + for modality, modality_repls in mm_prompt_repls.items(): + item_idx = item_idx_by_modality[modality] + if item_idx >= mm_item_counts.get(modality, 0): continue - if prompt[start_idx:end_idx] == repl_tokens: - yield PlaceholderInfo( - modality=modality, - item_idx=item_idx, - start_idx=start_idx, - replacement=repl_tokens, - ) + for repl_info in modality_repls: + replacement = repl_info.get_replacement(item_idx) + repl_tokens = replacement.token_ids + repl_len = len(repl_tokens) + end_idx = start_idx + repl_len + + if repl_len == 0 or end_idx > prompt_len: + continue + + if prompt[start_idx:end_idx] == repl_tokens: + yield PlaceholderInfo( + modality=modality, + item_idx=item_idx, + start_idx=start_idx, + replacement=repl_tokens, + ) - item_idx += 1 - if item_idx >= modal_item_count: - return + # Exclude overlapping matches + start_idx = end_idx + item_idx_by_modality[modality] += 1 + found = True + break - # Exclude overlapping matches - start_idx = end_idx - found = True - break + if found: + break # Go back to the outer while loop if not found: start_idx += 1 -def _iter_placeholders( - mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]], - prompt: list[int], - mm_item_counts: Mapping[str, int], -) -> Iterable[PlaceholderInfo]: - """ - For each modality, yield each set of placeholder tokens found in - :code:`prompt`. - - Note that empty matches are ignored. - """ - for modality, modal_item_count in mm_item_counts.items(): - if modality in mm_prompt_repls: - yield from _iter_modality_placeholders( - prompt, - modality, - mm_prompt_repls[modality], - modal_item_count, - ) - - def find_mm_placeholders( mm_prompt_repls: Mapping[str, Sequence[BoundPromptReplacement]], prompt: list[int], @@ -1156,7 +1145,7 @@ def apply( # If HF processor already inserts placeholder tokens, # there is no need for us to insert them - if all(len(repls) == 0 for repls in mm_missing_repls.items()): + if all(len(repls) == 0 for repls in mm_missing_repls.values()): tokenizer = self.info.get_tokenizer() prompt = decode_tokens(tokenizer, prompt_ids) mm_placeholders = hf_mm_placeholders diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 804a91da8c889..2961f7c76ca12 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -259,7 +259,10 @@ def get_max_tokens_per_item_by_modality( This is currently directly used only in V1. """ if self.has_processor(model_config): - tokenizer = cached_get_tokenizer(model_config.tokenizer) + tokenizer = cached_get_tokenizer( + model_config.tokenizer, + trust_remote_code=model_config.trust_remote_code, + ) processor = self.create_processor(model_config, tokenizer) seq_len = model_config.max_model_len return processor.info.get_mm_max_tokens_per_item(seq_len) From e7283d15fc53361884dfb34921d5d91453baa10d Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 14 Jan 2025 10:06:38 +0000 Subject: [PATCH 2/2] Comment Signed-off-by: DarkLight1337 --- tests/multimodal/test_processing.py | 1 + vllm/multimodal/processing.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 4d2c14b228f61..9e58ed4cfde93 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -421,6 +421,7 @@ def test_find_replace_tokens( "pattern_1": [32000, 32000], "pattern_2": [], "pattern_3": [1550, 918, 1550], + # Test different modalities having the same tokens (32000) "pattern_4": [32000], }, ], diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 1c8d0a762cb33..fa199a07b4cf8 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -412,7 +412,7 @@ def _iter_placeholders( """ Yield each set of placeholder tokens found in :code:`prompt`. - Matches are exclusive even when multiple modalities specify + Matches are exclusive even when multiple modalities share the same placeholder tokens. In that case, the modality that appears earlier in `mm_prompt_repls` takes priority.