Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Credentials Required for Content Safety and Protected Materials Evaluators #37707

Merged
merged 5 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Breaking Changes

- Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
- `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.

### Bugs Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Dict, Optional
from typing_extensions import override

from azure.identity import DefaultAzureCredential
from azure.core.credentials import TokenCredential
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
from azure.ai.evaluation._exceptions import EvaluationException
Expand All @@ -17,32 +17,28 @@ class RaiServiceEvaluatorBase(EvaluatorBase):
This includes content safety evaluators, protected material evaluators, and others. These evaluators
are all assumed to be of the "query and response or conversation" input variety.

param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
to specify which evaluation to perform.
type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
:param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
to specify which evaluation to perform.
:type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
:param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
aggregation will be performed. If False, all turns will be evaluated and the numeric results will be,
aggregated. Per-turn results are still be available in the output via the "evaluation_per_turn" key
when this occurs. Default is False, resulting full conversation evaluation and aggregation.
type eval_last_turn: bool
:type eval_last_turn: bool
"""

@override
def __init__(
self,
eval_metric: EvaluationMetrics,
azure_ai_project: dict,
credential: Optional[dict] = None,
needuv marked this conversation as resolved.
Show resolved Hide resolved
credential: TokenCredential,
eval_last_turn: bool = False,
):
super().__init__(eval_last_turn=eval_last_turn)
self._eval_metric = eval_metric
self._azure_ai_project = azure_ai_project
if credential is None:
# Use DefaultCredential if no credential is provided
self._credential = DefaultAzureCredential()
else:
self._credential = credential
self._credential = credential

@override
def __call__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class ContentSafetyEvaluator:
"""
Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: Callable

Expand Down Expand Up @@ -66,13 +66,13 @@ class ContentSafetyEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, parallel: bool = True, credential=None):
def __init__(self, credential, azure_ai_project: dict, parallel: bool = True):
self._parallel = parallel
self._evaluators = [
ViolenceEvaluator(azure_ai_project, credential),
SexualEvaluator(azure_ai_project, credential),
SelfHarmEvaluator(azure_ai_project, credential),
HateUnfairnessEvaluator(azure_ai_project, credential),
ViolenceEvaluator(credential, azure_ai_project),
SexualEvaluator(credential, azure_ai_project),
SelfHarmEvaluator(credential, azure_ai_project),
HateUnfairnessEvaluator(credential, azure_ai_project),
]

def __call__(self, *, query: str, response: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class ContentSafetyChatEvaluator:
"""
Initialize a content safety chat evaluator configured to evaluate content safetry metrics for chat scenario.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
Expand All @@ -38,8 +40,6 @@ class ContentSafetyChatEvaluator:
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: Callable

Expand Down Expand Up @@ -88,7 +88,7 @@ class ContentSafetyChatEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True, credential=None):
def __init__(self, credential, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True):
self._eval_last_turn = eval_last_turn
self._parallel = parallel
self._evaluators = [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a hate-unfairness evaluator for hate unfairness score.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]

**Usage**

Expand Down Expand Up @@ -43,8 +42,8 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a self harm evaluator for self harm score.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]

**Usage**

Expand Down Expand Up @@ -43,8 +42,8 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
needuv marked this conversation as resolved.
Show resolved Hide resolved
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a sexual evaluator for sexual score.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]

**Usage**

Expand Down Expand Up @@ -43,8 +42,8 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a violence evaluator for violence score.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]

**Usage**

Expand Down Expand Up @@ -43,8 +42,8 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -17,11 +16,11 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
"AI-generated content may be incorrect. If you are seeking ECI-related information, please go to Bing Search."
Outputs True or False with AI-generated reasoning.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: Whether or not ECI was found in the response without a disclaimer, with AI-generated reasoning
:rtype: Dict[str, str]

Expand Down Expand Up @@ -50,8 +49,8 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -12,11 +11,11 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
Initialize a protected material evaluator to detect whether protected material
is present in your AI system's response. Outputs True or False with AI-generated reasoning.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: Whether or not protected material was found in the response, with AI-generated reasoning.
:rtype: Dict[str, str]

Expand Down Expand Up @@ -45,8 +44,8 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ class ProtectedMaterialsEvaluator:
Initialize a protected materials evaluator to detect whether protected material
is present in your AI system's response. Outputs True or False with AI-generated reasoning.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: Whether or not protected material was found in the response, with AI-generated reasoning.
:rtype: Dict[str, str]

Expand All @@ -84,7 +84,7 @@ class ProtectedMaterialsEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, credential=None):
def __init__(self, credential, azure_ai_project: dict):
self._async_evaluator = _AsyncProtectedMaterialsEvaluator(azure_ai_project, credential)

def __call__(self, *, query: str, response: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# ---------------------------------------------------------
import logging

from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -17,14 +16,14 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):

Detect whether cross domain injected attacks are present in your AI system's response.

:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
information.
Expand Down Expand Up @@ -53,8 +52,8 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
3 changes: 2 additions & 1 deletion sdk/evaluation/azure-ai-evaluation/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
"promptflow-devkit>=1.15.0",
"promptflow-core>=1.15.0",
"pyjwt>=2.8.0",
"azure-identity>=1.12.0",
# pickle support for credentials was added to this release
"azure-identity>=1.16.0",
"azure-core>=1.30.2",
"nltk>=3.9.1",
"rouge-score>=0.1.2",
Expand Down
Loading
Loading