diff --git a/crates/llm-base/src/vocabulary.rs b/crates/llm-base/src/vocabulary.rs index 0014b056..43540c83 100644 --- a/crates/llm-base/src/vocabulary.rs +++ b/crates/llm-base/src/vocabulary.rs @@ -311,6 +311,10 @@ impl ModelVocabulary { let mut vec = vec![]; for token in tokens { + if skip_special_tokens && token == 1 { + continue; + } + vec.append(&mut self.id_to_token[token as usize].to_vec()); }