diff --git a/merge_peft_adapter.py b/merge_peft_adapter.py
index 239ce90..180bd81 100644
--- a/merge_peft_adapter.py
+++ b/merge_peft_adapter.py
@@ -65,7 +65,6 @@ def main():
             raise ValueError("chatglm does not support sequence classification")
         base_model = AutoModelForSequenceClassification.from_pretrained(
             base_model_path,
-            num_labels=1,
             load_in_8bit=False,
             torch_dtype=torch.float16,
             trust_remote_code=True,
diff --git a/reward_modeling.py b/reward_modeling.py
index d2e0e78..a6d47f9 100644
--- a/reward_modeling.py
+++ b/reward_modeling.py
@@ -376,6 +376,7 @@ def main():
             model_args.device_map = {"": int(os.environ["LOCAL_RANK"]) or 0}
         config = config_class.from_pretrained(
             model_args.model_name_or_path,
+            num_labels=1,
             torch_dtype=torch_dtype,
             trust_remote_code=model_args.trust_remote_code,
             cache_dir=model_args.cache_dir
@@ -384,7 +385,6 @@ def main():
             model = model_class.from_pretrained(
                 model_args.model_name_or_path,
                 config=config,
-                num_labels=1,
                 load_in_8bit=model_args.load_in_8bit,
                 device_map=model_args.device_map,
                 trust_remote_code=model_args.trust_remote_code,
@@ -394,7 +394,6 @@ def main():
             model = model_class.from_pretrained(
                 model_args.model_name_or_path,
                 config=config,
-                num_labels=1,
                 cache_dir=model_args.cache_dir,
                 ignore_mismatched_sizes=True
             )
diff --git a/supervised_finetuning.py b/supervised_finetuning.py
index 044908b..42d8881 100644
--- a/supervised_finetuning.py
+++ b/supervised_finetuning.py
@@ -121,7 +121,7 @@ def __post_init__(self):
         if self.model_name_or_path is None:
             raise ValueError("You must specify a valid model_name_or_path to run training.")
         if self.model_max_length < 256:
-            raise ValueError("You must set model_max_length more than 256, default is 512")
+            raise ValueError("You must specify a valid model_max_length >= 256 to run training.")
 
 
 @dataclass
@@ -726,6 +726,8 @@ def preprocess_function(examples):
         # Mask targets. Only compute loss on the assistant outputs.
         sep = conv.sep + conv.roles[1] + ": "
         for conversation, target in zip(conversations, targets):
+            total_len = int(target.ne(tokenizer.pad_token_id).sum())
+
             turns = conversation.split(conv.sep2)
             cur_len = 1
             target[:cur_len] = IGNORE_INDEX
@@ -738,13 +740,22 @@ def preprocess_function(examples):
                 if len(parts) != 2:
                     break
                 parts[0] += sep
-                # "-2" is hardcoded for the LLaMA tokenizer to make the offset correct.
-                instruction_len = len(tokenizer(parts[0]).input_ids) - 2
+                instruction_len = len(tokenizer(parts[0]).input_ids)
+                if model_args.model_type in ['llama']:
+                    # "-2" is hardcoded for the LLaMA tokenizer to make the offset correct.
+                    instruction_len = instruction_len - 2
 
                 # Ignore the user instructions
                 target[cur_len: cur_len + instruction_len] = IGNORE_INDEX
                 cur_len += turn_len
+
             target[cur_len:] = IGNORE_INDEX
+
+            if cur_len < tokenizer.model_max_length:
+                if cur_len != total_len:
+                    target[:] = IGNORE_INDEX
+                    logger.warning(f"tokenization mismatch: {cur_len} vs. {total_len}. (ignored)")
+
         return dict(
             input_ids=input_ids,
             labels=targets,
@@ -891,7 +902,9 @@ def preprocess_function(examples):
     # Training
     if training_args.do_train:
         logger.info("*** Train ***")
-        logger.debug(f"Train dataloader example: {next(iter(trainer.get_train_dataloader()))}")
+        sample = next(iter(trainer.get_train_dataloader()))
+        logger.debug(f"Train dataloader example: {sample}")
+        logger.debug(f"Details: \ninput_ids: {list(sample['input_ids'])}, \nlabels: {list(sample['labels'])}")
         checkpoint = None
         if training_args.resume_from_checkpoint is not None:
             checkpoint = training_args.resume_from_checkpoint