From 728b4d0f570cf56af77a966ebb73a11985008025 Mon Sep 17 00:00:00 2001
From: Botao Chen <markchen1015@meta.com>
Date: Wed, 19 Feb 2025 19:34:31 -0800
Subject: [PATCH] commit

---
 .../scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py       | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
index a53c5cfa7e..074f1ff462 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
@@ -5,7 +5,11 @@
 # the root directory of this source tree.
 
 from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
+from llama_stack.apis.scoring_functions import (
+    AggregationFunctionType,
+    LLMAsJudgeScoringFnParams,
+    ScoringFn,
+)
 
 GRADER_TEMPLATE = """
 Your job is to look at a question, a gold target, and a predicted answer, and then assign a grade of either ["CORRECT", "INCORRECT", "NOT_ATTEMPTED"].
@@ -87,5 +91,6 @@
         judge_model="meta-llama/Llama-3.1-405B-Instruct",
         prompt_template=GRADER_TEMPLATE,
         judge_score_regexes=[r"(A|B|C)"],
+        aggregation_functions=[AggregationFunctionType.categorical_count.value],
     ),
 )