diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 38d19028c0f..893f2f06bbd 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -19,7 +19,7 @@ class EvaluationMetrics: SELF_HARM = "self_harm" SEXUAL = "sexual" PROTECTED_MATERIAL = "protected_material" - INDIRECT_ATTACK = "indirect_attack" + XPIA = "xpia" class _InternalEvaluationMetrics: diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py index 4f015c912b4..578236c527f 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py @@ -92,7 +92,7 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s handled_metrics = [ EvaluationMetrics.PROTECTED_MATERIAL, _InternalEvaluationMetrics.ECI, - EvaluationMetrics.INDIRECT_ATTACK, + EvaluationMetrics.XPIA, ] label_cols = [] for col in df.columns: