From 0f6a61ef233e7cb7db92f430d47a6ea84d1f3593 Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Tue, 1 Aug 2023 17:36:10 +0800
Subject: [PATCH 1/5] update finetune dataset.

---
 supervised_finetuning.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index 044908b..d61af9d 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -726,6 +726,8 @@ def preprocess_function(examples):
         # Mask targets. Only compute loss on the assistant outputs.
         sep = conv.sep + conv.roles[1] + ": "
         for conversation, target in zip(conversations, targets):
+            total_len = int(target.ne(tokenizer.pad_token_id).sum())
+
             turns = conversation.split(conv.sep2)
             cur_len = 1
             target[:cur_len] = IGNORE_INDEX
@@ -744,7 +746,14 @@ def preprocess_function(examples):
                 # Ignore the user instructions
                 target[cur_len: cur_len + instruction_len] = IGNORE_INDEX
                 cur_len += turn_len
+
             target[cur_len:] = IGNORE_INDEX
+
+            if cur_len < tokenizer.model_max_length:
+                if cur_len != total_len:
+                    target[:] = IGNORE_INDEX
+                    logger.warning(f"tokenization mismatch: {cur_len} vs. {total_len}. (ignored)")
+
         return dict(
             input_ids=input_ids,
             labels=targets,

From 25a6d6ba3a8d2932f68093a2b1be3f27238ea42d Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Tue, 1 Aug 2023 18:19:11 +0800
Subject: [PATCH 2/5] update reward num labels.

---
 reward_modeling.py       | 3 +--
 supervised_finetuning.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/reward_modeling.py b/reward_modeling.py
index d2e0e78..a6d47f9 100644
--- a/reward_modeling.py
+++ b/reward_modeling.py
@@ -376,6 +376,7 @@ def main():
             model_args.device_map = {"": int(os.environ["LOCAL_RANK"]) or 0}
         config = config_class.from_pretrained(
             model_args.model_name_or_path,
+            num_labels=1,
             torch_dtype=torch_dtype,
             trust_remote_code=model_args.trust_remote_code,
             cache_dir=model_args.cache_dir
@@ -384,7 +385,6 @@ def main():
             model = model_class.from_pretrained(
                 model_args.model_name_or_path,
                 config=config,
-                num_labels=1,
                 load_in_8bit=model_args.load_in_8bit,
                 device_map=model_args.device_map,
                 trust_remote_code=model_args.trust_remote_code,
@@ -394,7 +394,6 @@ def main():
             model = model_class.from_pretrained(
                 model_args.model_name_or_path,
                 config=config,
-                num_labels=1,
                 cache_dir=model_args.cache_dir,
                 ignore_mismatched_sizes=True
             )
diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index d61af9d..3b3c2e0 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -121,7 +121,7 @@ def __post_init__(self):
         if self.model_name_or_path is None:
             raise ValueError("You must specify a valid model_name_or_path to run training.")
         if self.model_max_length < 256:
-            raise ValueError("You must set model_max_length more than 256, default is 512")
+            raise ValueError("You must specify a valid model_max_length >= 256 to run training.")
 
 
 @dataclass

From 9f03e9492f66be1737804a33a493214dcb16f878 Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Tue, 1 Aug 2023 18:22:31 +0800
Subject: [PATCH 3/5] update reward num labels.

---
 merge_peft_adapter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/merge_peft_adapter.py b/merge_peft_adapter.py
index 239ce90..180bd81 100644
--- a/merge_peft_adapter.py
+++ b/merge_peft_adapter.py
@@ -65,7 +65,6 @@ def main():
             raise ValueError("chatglm does not support sequence classification")
         base_model = AutoModelForSequenceClassification.from_pretrained(
             base_model_path,
-            num_labels=1,
             load_in_8bit=False,
             torch_dtype=torch.float16,
             trust_remote_code=True,

From 03350db1503b9210f23f799e6bd88da9e9813416 Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Tue, 1 Aug 2023 19:33:15 +0800
Subject: [PATCH 4/5] update instruction -2.

---
 supervised_finetuning.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index 3b3c2e0..c6dee95 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -740,8 +740,10 @@ def preprocess_function(examples):
                 if len(parts) != 2:
                     break
                 parts[0] += sep
-                # "-2" is hardcoded for the LLaMA tokenizer to make the offset correct.
-                instruction_len = len(tokenizer(parts[0]).input_ids) - 2
+                instruction_len = len(tokenizer(parts[0]).input_ids)
+                if model_args.model_type in ['llama']:
+                    # "-2" is hardcoded for the LLaMA tokenizer to make the offset correct.
+                    instruction_len = instruction_len - 2
 
                 # Ignore the user instructions
                 target[cur_len: cur_len + instruction_len] = IGNORE_INDEX

From adc5597f46f17353f60b5d8db37e6615ab64d926 Mon Sep 17 00:00:00 2001
From: shibing624 <shibing624@126.com>
Date: Tue, 1 Aug 2023 19:45:42 +0800
Subject: [PATCH 5/5] update detail.

---
 supervised_finetuning.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index c6dee95..42d8881 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -902,7 +902,9 @@ def preprocess_function(examples):
     # Training
     if training_args.do_train:
         logger.info("*** Train ***")
-        logger.debug(f"Train dataloader example: {next(iter(trainer.get_train_dataloader()))}")
+        sample = next(iter(trainer.get_train_dataloader()))
+        logger.debug(f"Train dataloader example: {sample}")
+        logger.debug(f"Details: \ninput_ids: {list(sample['input_ids'])}, \nlabels: {list(sample['labels'])}")
         checkpoint = None
         if training_args.resume_from_checkpoint is not None:
             checkpoint = training_args.resume_from_checkpoint