qwen-vl processor cut [:max_len] (PaddlePaddle#414)

WAYKEN-TSE · Feb 5, 2024 · c893f96 · c893f96
1 parent 4d8458f
commit c893f96
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/paddlemix/processors/qwen_vl_processing.py b/paddlemix/processors/qwen_vl_processing.py
@@ -75,6 +75,7 @@ def __call__(
         return inputs
 
     def train_preprocess(self, sources, system_message: str = "You are a helpful assistant."):
+
         IGNORE_TOKEN_ID = -100
         im_start = self.tokenizer.im_start_id
         im_end = self.tokenizer.im_end_id
@@ -121,8 +122,8 @@ def train_preprocess(self, sources, system_message: str = "You are a helpful ass
         assert len(input_id) == len(target)
 
         inputs = dict(
-            input_ids=input_id,
-            labels=target,
+            input_ids=input_id[: self.max_len],
+            labels=target[1 : self.max_len],
         )
         if len(image_path) > 0:
             inputs["images"] = self.image_processor(image_path)