Skip to content

Commit

Permalink
fix stop words for glm4 (#2044)
Browse files Browse the repository at this point in the history
* fix

* update ut
  • Loading branch information
RunningLeon authored Jul 16, 2024
1 parent 9cdce39 commit 7b24674
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
2 changes: 2 additions & 0 deletions lmdeploy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,10 +1583,12 @@ def __init__(self,
system='<|system|>\n',
user='<|user|>\n',
assistant='<|assistant|>\n',
stop_words=['<|user|>', '<|endoftext|>', '<|observation|>'],
**kwargs):
super().__init__(system=system,
user=user,
assistant=assistant,
stop_words=stop_words,
**kwargs)
self.start = '[gMASK]<sop>'

Expand Down
10 changes: 8 additions & 2 deletions tests/test_lmdeploy/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,13 @@ def test_chatglm3():


def test_glm4():
model = MODELS.get('glm4')()
model_path_and_name = 'THUDM/glm-4-9b-chat'
deduced_name = best_match_model(model_path_and_name)
assert deduced_name == 'glm4'

model = MODELS.get(deduced_name)()
# check stop words
assert model.stop_words == ['<|user|>', '<|endoftext|>', '<|observation|>']
messages = [{
'role': 'system',
'content': 'you are a helpful assistant'
Expand All @@ -344,7 +350,7 @@ def test_glm4():
'content': 'AGI is?'
}]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('THUDM/glm-4-9b-chat',
tokenizer = AutoTokenizer.from_pretrained(model_path_and_name,
trust_remote_code=True)
ref = tokenizer.apply_chat_template(messages, tokenize=False)
res = model.messages2prompt(messages)
Expand Down

0 comments on commit 7b24674

Please sign in to comment.