Skip to content

Commit

Permalink
Merge pull request #13 from openshift-psap/fix/mmlu_pro_1shot
Browse files Browse the repository at this point in the history
Utilize Mutli-shot examples in MLLU and MLLU-Pro
  • Loading branch information
sjmonson authored Oct 24, 2024
2 parents 7720dd8 + 612cc38 commit 71bcff7
Show file tree
Hide file tree
Showing 16 changed files with 16 additions and 16 deletions.
2 changes: 1 addition & 1 deletion llm_eval_test/benchmarks/tasks/mmlu/mmlu.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_all
include: unitxt
recipe: card=cards.mmlu.all,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu.all,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=5,demos_pool_size=10
2 changes: 1 addition & 1 deletion llm_eval_test/benchmarks/tasks/mmlu_pro/mmlu_pro_all.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_all
include: unitxt
recipe: card=cards.mmlu_pro.all,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.all,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_biology
include: unitxt
recipe: card=cards.mmlu_pro.biology,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.biology,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_business
include: unitxt
recipe: card=cards.mmlu_pro.business,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.business,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_chemistry
include: unitxt
recipe: card=cards.mmlu_pro.chemistry,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.chemistry,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_computer_science
include: unitxt
recipe: card=cards.mmlu_pro.computer_science,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.computer_science,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_economics
include: unitxt
recipe: card=cards.mmlu_pro.economics,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.economics,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_engineering
include: unitxt
recipe: card=cards.mmlu_pro.engineering,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.engineering,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_health
include: unitxt
recipe: card=cards.mmlu_pro.health,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.health,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_history
include: unitxt
recipe: card=cards.mmlu_pro.history,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.history,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
2 changes: 1 addition & 1 deletion llm_eval_test/benchmarks/tasks/mmlu_pro/mmlu_pro_law.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_law
include: unitxt
recipe: card=cards.mmlu_pro.law,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.law,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
2 changes: 1 addition & 1 deletion llm_eval_test/benchmarks/tasks/mmlu_pro/mmlu_pro_math.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_math
include: unitxt
recipe: card=cards.mmlu_pro.math,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.math,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_other
include: unitxt
recipe: card=cards.mmlu_pro.other,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.other,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_philosophy
include: unitxt
recipe: card=cards.mmlu_pro.philosophy,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.philosophy,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_physics
include: unitxt
recipe: card=cards.mmlu_pro.physics,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.physics,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
task: mmlu_pro_psychology
include: unitxt
recipe: card=cards.mmlu_pro.psychology,template=templates.qa.multiple_choice.with_topic.lm_eval_harness
recipe: card=cards.mmlu_pro.psychology,template=templates.qa.multiple_choice.with_topic.lm_eval_harness,num_demos=1,demos_pool_size=4

0 comments on commit 71bcff7

Please sign in to comment.