diff --git a/requirements.txt b/requirements.txt
index 3db4e5b..253dd25 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-# GLM4 transformers==4.40.2
 accelerate~=0.27.2
 datasets>=2.14.6
 loguru
@@ -7,6 +6,6 @@ sentencepiece
 scikit-learn
 tensorboard
 tqdm>=4.47.0
-transformers>=4.39.3
+transformers>=4.39.3 # GLM4 transformers==4.40.2
 trl~=0.8.3
 tiktoken
diff --git a/run_full_train.sh b/run_full_sft.sh
similarity index 100%
rename from run_full_train.sh
rename to run_full_sft.sh
diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index 6d750b4..d4cc15b 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -824,8 +824,8 @@ def filter_empty_labels(example):
             **config_kwargs,
         )
 
-        # Fix ChatGLM2 and ChatGLM3 LM head
-        if getattr(config, "model_type", None) == "chatglm":
+        # Fix ChatGLM2 and ChatGLM3 and internlm2 LM head
+        if getattr(config, "model_type", None) == "chatglm" or getattr(config, "model_type", None) == "internlm2":
             setattr(model, "lm_head", model.transformer.output_layer)
             setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])