Merge pull request #159 from dividez/fix_rl_training

Update rl_training.py
shibing624 · Aug 9, 2023 · e253502 · e253502
2 parents 53e166c + 6ba6ba1
commit e253502
Showing 1 changed file with 13 additions and 10 deletions.
diff --git a/rl_training.py b/rl_training.py
@@ -348,16 +348,19 @@ def preprocess_function(examples):
             "query": [],
             "input_ids": [],
         }
-        for instruction, input in zip(examples['instruction'], examples['input']):
-            if input:
-                instruction = instruction + "\n" + input
-            source = PROMPT_TEMPLATE.format_map({"instruction": instruction})
-            tokenized_question = tokenizer(
-                source, truncation=True, max_length=max_source_length, padding="max_length",
-                return_tensors="pt"
-            )
-            new_examples["query"].append(source)
-            new_examples["input_ids"].append(tokenized_question["input_ids"])
+        for conversation in examples['conversations']:
+            for message in conversation:
+                instruction = message['value']
+                input = message['from']
+                if input:
+                    instruction = instruction + "\n" + input
+                source = PROMPT_TEMPLATE.format_map({"instruction": instruction})
+                tokenized_question = tokenizer(
+                    source, truncation=True, max_length=max_source_length, padding="max_length",
+                    return_tensors="pt"
+                )
+                new_examples["query"].append(source)
+                new_examples["input_ids"].append(tokenized_question["input_ids"])
 
         return new_examples