Skip to content

Commit

Permalink
Merge branch 'main' into task/changeAdvSimInit
Browse files Browse the repository at this point in the history
  • Loading branch information
nagkumar91 authored Jun 25, 2024
2 parents ba9db9c + 2eb9cb4 commit 8d6247f
Show file tree
Hide file tree
Showing 20 changed files with 538 additions and 299 deletions.
6 changes: 6 additions & 0 deletions examples/tutorials/get-started/quickstart-azure.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@
"base_run = pf.run(\n",
" flow=flow,\n",
" data=data,\n",
" column_mapping={\n",
" \"url\": \"${data.url}\",\n",
" },\n",
")\n",
"print(base_run)"
]
Expand Down Expand Up @@ -309,6 +312,9 @@
"variant_run = pf.run(\n",
" flow=flow,\n",
" data=data,\n",
" column_mapping={\n",
" \"url\": \"${data.url}\",\n",
" },\n",
" variant=\"${summarize_text_content.variant_1}\", # here we specify node \"summarize_text_content\" to use variant 1 version.\n",
")"
]
Expand Down
12 changes: 11 additions & 1 deletion examples/tutorials/get-started/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,14 @@
"data = \"../../flows/standard/web-classification/data.jsonl\" # path to the data file\n",
"\n",
"# create run with default variant\n",
"base_run = pf.run(flow=flow, data=data, stream=True)"
"base_run = pf.run(\n",
" flow=flow,\n",
" data=data,\n",
" stream=True,\n",
" column_mapping={\n",
" \"url\": \"${data.url}\",\n",
" },\n",
")"
]
},
{
Expand Down Expand Up @@ -335,6 +342,9 @@
" flow=flow,\n",
" data=data,\n",
" variant=\"${summarize_text_content.variant_1}\", # here we specify node \"summarize_text_content\" to use variant 1 version.\n",
" column_mapping={\n",
" \"url\": \"${data.url}\",\n",
" },\n",
" stream=True,\n",
")"
]
Expand Down
6 changes: 6 additions & 0 deletions scripts/docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@
myst_heading_anchors = 5


# allow annotation for __call__ methods
autodoc_default_options = {
'special-members': '__call__',
}


def setup(app):
# Add the gallery directive
app.add_directive("gallery-grid", GalleryDirective)
86 changes: 54 additions & 32 deletions src/promptflow-evals/promptflow/evals/evaluators/_chat/_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,59 @@


class ChatEvaluator:
"""
Initialize a chat evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = ChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
}
]
result = chat_eval(conversation=conversation)
**Output format**
.. code-block:: python
{
"evaluation_per_turn": {
"gpt_retrieval": [1.0, 2.0],
"gpt_groundedness": [5.0, 2.0],
"gpt_relevance": [3.0, 5.0],
"gpt_coherence": [1.0, 2.0],
"gpt_fluency": [3.0, 5.0]
}
"gpt_retrieval": 1.5,
"gpt_groundedness": 3.5,
"gpt_relevance": 4.0,
"gpt_coherence": 1.5,
"gpt_fluency": 4.0
}
"""

def __init__(
self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = ChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
}
]
result = chat_eval(conversation=conversation)
"""
self._eval_last_turn = eval_last_turn
self._parallel = parallel

Expand All @@ -73,7 +93,8 @@ def __init__(
self._retrieval_chat_evaluator = RetrievalChatEvaluator(model_config)

def __call__(self, *, conversation, **kwargs):
"""Evaluates chat scenario.
"""
Evaluates chat scenario.
:param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys.
"context" key is optional for assistant's turn and should have "citations" key with list of citations.
Expand Down Expand Up @@ -222,7 +243,8 @@ def _validate_conversation(self, conversation: List[Dict]):
one_based_turn_num = turn_num + 1

if not isinstance(turn, dict):
raise ValueError(f"Each turn in 'conversation' must be a dictionary. Turn number: {one_based_turn_num}")
raise ValueError(
f"Each turn in 'conversation' must be a dictionary. Turn number: {one_based_turn_num}")

if "role" not in turn or "content" not in turn:
raise ValueError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,44 @@


class RetrievalChatEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = RetrievalChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: function
**Usage**
.. code-block:: python
chat_eval = RetrievalChatEvaluator(model_config)
conversation = [
{"role": "user", "content": "What is the value of 2 + 2?"},
{"role": "assistant", "content": "2 + 2 = 4", "context": {
"citations": [
{"id": "math_doc.md", "content": "Information about additions: 1 + 2 = 3, 2 + 2 = 4"}
]
}
]
result = chat_eval(conversation=conversation)
"""
}
]
result = chat_eval(conversation=conversation)
**Output format**
.. code-block:: python
{
"gpt_retrieval": 3.0
"evaluation_per_turn": {
"gpt_retrieval": {
"score": [1.0, 2.0, 3.0]
}
}
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,31 @@


class CoherenceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
"""
Initialize a coherence evaluator configured for a specific Azure OpenAI model.
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
:param model_config: Configuration for the Azure OpenAI model.
:type model_config: AzureOpenAIModelConfiguration
**Usage**
**Usage**
.. code-block:: python
.. code-block:: python
eval_fn = CoherenceEvaluator(model_config)
result = eval_fn(
question="What is the capital of Japan?",
answer="The capital of Japan is Tokyo.")
"""
eval_fn = CoherenceEvaluator(model_config)
result = eval_fn(
question="What is the capital of Japan?",
answer="The capital of Japan is Tokyo.")
**Output format**
.. code-block:: python
{
"gpt_coherence": 1.0
}
"""

def __init__(self, model_config: AzureOpenAIModelConfiguration):
# TODO: Remove this block once the bug is fixed
# https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
if model_config.api_version is None:
Expand All @@ -39,7 +48,9 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
self._flow = load_flow(source=prompty_path, model=prompty_model_config)

def __call__(self, *, question: str, answer: str, **kwargs):
"""Evaluate coherence.
"""
Evaluate coherence.
:param question: The question to be evaluated.
:type question: str
:param answer: The answer to be evaluated.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,55 @@


class ContentSafetyEvaluator:
def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
"""
Initialize an evaluator configured to evaluate content safetry metrics for QA scenario.
"""
Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
:param project_scope: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type project_scope: dict
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: function
:param project_scope: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type project_scope: dict
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: function
**Usage**
**Usage**
.. code-block:: python
.. code-block:: python
project_scope = {
"subscription_id": "<subscription_id>",
"resource_group_name": "<resource_group_name>",
"project_name": "<project_name>",
}
eval_fn = ContentSafetyEvaluator(project_scope)
result = eval_fn(
question="What is the capital of France?",
answer="Paris.",
)
"""
project_scope = {
"subscription_id": "<subscription_id>",
"resource_group_name": "<resource_group_name>",
"project_name": "<project_name>",
}
eval_fn = ContentSafetyEvaluator(project_scope)
result = eval_fn(
question="What is the capital of France?",
answer="Paris.",
)
**Output format**
.. code-block:: python
{
"violence": "Medium",
"violence_score": 5.0,
"violence_reason": "Some reason",
"sexual": "Medium",
"sexual_score": 5.0,
"sexual_reason": "Some reason",
"self_harm": "Medium",
"self_harm_score": 5.0,
"self_harm_reason": "Some reason",
"hate_unfairness": "Medium",
"hate_unfairness_score": 5.0,
"hate_unfairness_reason": "Some reason"
}
"""

def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
self._parallel = parallel
self._evaluators = [
ViolenceEvaluator(project_scope, credential),
Expand All @@ -45,7 +65,8 @@ def __init__(self, project_scope: dict, parallel: bool = True, credential=None):
]

def __call__(self, *, question: str, answer: str, **kwargs):
"""Evaluates content-safety metrics for "question-answering" scenario.
"""
Evaluates content-safety metrics for "question-answering" scenario.
:param question: The question to be evaluated.
:type question: str
Expand Down
Loading

0 comments on commit 8d6247f

Please sign in to comment.