diff --git a/requirements.txt b/requirements.txt index 3db4e5b..253dd25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -# GLM4 transformers==4.40.2 accelerate~=0.27.2 datasets>=2.14.6 loguru @@ -7,6 +6,6 @@ sentencepiece scikit-learn tensorboard tqdm>=4.47.0 -transformers>=4.39.3 +transformers>=4.39.3 # GLM4 transformers==4.40.2 trl~=0.8.3 tiktoken diff --git a/run_full_train.sh b/run_full_sft.sh similarity index 100% rename from run_full_train.sh rename to run_full_sft.sh diff --git a/supervised_finetuning.py b/supervised_finetuning.py index 6d750b4..d4cc15b 100644 --- a/supervised_finetuning.py +++ b/supervised_finetuning.py @@ -824,8 +824,8 @@ def filter_empty_labels(example): **config_kwargs, ) - # Fix ChatGLM2 and ChatGLM3 LM head - if getattr(config, "model_type", None) == "chatglm": + # Fix ChatGLM2 and ChatGLM3 and internlm2 LM head + if getattr(config, "model_type", None) == "chatglm" or getattr(config, "model_type", None) == "internlm2": setattr(model, "lm_head", model.transformer.output_layer) setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])