Skip to content

Commit

Permalink
Merge branch 'main' into bwilliams2/tool_calls_parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
bwilliams2 authored Jun 18, 2024
2 parents b69cffc + 08cc0d4 commit 4595b3b
Show file tree
Hide file tree
Showing 16 changed files with 114 additions and 14 deletions.
1 change: 1 addition & 0 deletions .github/workflows/promptflow-executor-unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ on:
- src/promptflow/promptflow/storage/**
- src/promptflow/tests/*
- src/promptflow/tests/executor/**
- src/promptflow/tests/test_configs/**
- src/promptflow-tracing/promptflow/**
- src/promptflow-core/promptflow/**
- src/promptflow-devkit/promptflow/**
Expand Down
1 change: 1 addition & 0 deletions src/promptflow-devkit/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Bugs Fixed
- Fix incompatibility with `trace.NoOpTracerProvider` when set exporter to prompt flow service.
- Add missing user agent in trace usage telemetry.

## v1.12.0 (2024.06.11)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
import re
from pathlib import Path
from typing import Any, Dict, List

from promptflow._constants import FlowEntryRegex
from promptflow._core.entry_meta_generator import _generate_flow_meta
from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT
from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT, PF_FLOW_META_LOAD_IN_SUBPROCESS
from promptflow._utils.flow_utils import resolve_python_entry_file

from ._base_inspector_proxy import AbstractInspectorProxy
Expand Down Expand Up @@ -35,7 +36,7 @@ def get_entry_meta(
**kwargs,
) -> Dict[str, Any]:
timeout = kwargs.get("timeout", FLOW_META_JSON_GEN_TIMEOUT)
load_in_subprocess = kwargs.get("load_in_subprocess", True)
load_in_subprocess = os.environ.get(PF_FLOW_META_LOAD_IN_SUBPROCESS, "True").lower() == "true"

flow_dag = {"entry": entry}
# generate flow.json only for eager flow for now
Expand Down
1 change: 1 addition & 0 deletions src/promptflow-devkit/promptflow/_sdk/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def _prepare_home_dir() -> Path:
PF_SERVICE_DEBUG = "PF_SERVICE_DEBUG"
PF_SYSTEM_METRICS_PREFIX = "__pf__"
PF_FLOW_ENTRY_IN_TMP = "PF_FLOW_ENTRY_IN_TMP"
PF_FLOW_META_LOAD_IN_SUBPROCESS = "PF_FLOW_META_LOAD_IN_SUBPROCESS"

LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve()
LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@
)
from promptflow._sdk._constants import HOME_PROMPT_FLOW_DIR, AzureMLWorkspaceTriad
from promptflow._sdk._telemetry.telemetry import get_telemetry_logger
from promptflow._sdk._user_agent import USER_AGENT
from promptflow._sdk.entities._trace import Span
from promptflow._utils.logger_utils import get_cli_sdk_logger
from promptflow._utils.user_agent_utils import setup_user_agent_to_operation_context
from promptflow.core._errors import MissingRequiredPackage

from .general_utils import convert_time_unix_nano_to_timestamp, json_load
Expand Down Expand Up @@ -345,6 +347,8 @@ class TraceTelemetryHelper:
CUSTOM_DIMENSIONS_TRACE_COUNT = "trace_count"

def __init__(self):
# `setup_user_agent_to_operation_context` will get user agent and return
self._user_agent = setup_user_agent_to_operation_context(USER_AGENT)
self._telemetry_logger = get_telemetry_logger()
self._lock = multiprocessing.Lock()
self._summary: typing.Dict[TraceCountKey, int] = dict()
Expand All @@ -369,6 +373,7 @@ def log_telemetry(self) -> None:
for key, count in summary_to_log.items():
custom_dimensions = key._asdict()
custom_dimensions[self.CUSTOM_DIMENSIONS_TRACE_COUNT] = count
custom_dimensions["user_agent"] = self._user_agent
self._telemetry_logger.info(self.TELEMETRY_ACTIVITY_NAME, extra={"custom_dimensions": custom_dimensions})


Expand Down
1 change: 1 addition & 0 deletions src/promptflow-devkit/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ promptflow = {path = "../promptflow"}
promptflow-tools = {path = "../promptflow-tools"}
promptflow-recording = {path = "../promptflow-recording"}
numpy = "<2.0.0" # avoid pandas incompatibility
tenacity = "<8.4.0" # tenacity has breaking in 8.4.0

[tool.poetry.group.test.dependencies]
pytest = "*"
Expand Down
32 changes: 30 additions & 2 deletions src/promptflow-devkit/tests/sdk_cli_test/unittests/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import uuid
from typing import Dict
from unittest.mock import patch
from unittest.mock import MagicMock, patch

import pytest
from mock import mock
Expand All @@ -34,7 +34,13 @@
ContextAttributeKey,
)
from promptflow._sdk._tracing import setup_exporter_to_pfs, start_trace_with_devkit
from promptflow._sdk._utilities.tracing_utils import WorkspaceKindLocalCache, append_conditions, parse_protobuf_span
from promptflow._sdk._utilities.tracing_utils import (
TraceCountKey,
TraceTelemetryHelper,
WorkspaceKindLocalCache,
append_conditions,
parse_protobuf_span,
)
from promptflow.client import PFClient
from promptflow.exceptions import UserErrorException
from promptflow.tracing._operation_context import OperationContext
Expand Down Expand Up @@ -325,3 +331,25 @@ def test_expired_cache(self):
mock_get_kind.return_value = kind
assert ws_local_cache.get_kind() == kind
assert not ws_local_cache.is_expired


@pytest.mark.unittest
@pytest.mark.sdk_test
class TestTraceTelemetry:
def test_user_agent_in_custom_dimensions(self):
def mock_info(*args, **kwargs):
extra: dict = kwargs.get("extra")
custom_dimensions: dict = extra.get("custom_dimensions")
assert "user_agent" in custom_dimensions.keys()
assert "promptflow-sdk/" in custom_dimensions["user_agent"]

mock_telemetry_logger = MagicMock()
mock_telemetry_logger.info = mock_info
with patch("promptflow._sdk._utilities.tracing_utils.get_telemetry_logger", return_value=mock_telemetry_logger):
telemetry_helper = TraceTelemetryHelper()
summary = dict()
k = TraceCountKey(None, None, None, "script", "code")
summary[k] = 1
# append the mock summary and log
telemetry_helper.append(summary)
telemetry_helper.log_telemetry()
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ---------------------------------------------------------
import os

from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP
from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS
from promptflow._utils.user_agent_utils import ClientUserAgentUtil
from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api

Expand All @@ -23,10 +23,12 @@ def __enter__(self):

if isinstance(self.client, ProxyClient):
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"

def __exit__(self, exc_type, exc_val, exc_tb):
if isinstance(self.client, CodeClient):
recover_openai_api()

if isinstance(self.client, ProxyClient):
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None)
39 changes: 39 additions & 0 deletions src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,46 @@ def evaluate(
)
"""
try:
return _evaluate(
evaluation_name=evaluation_name,
target=target,
data=data,
evaluators=evaluators,
evaluator_config=evaluator_config,
azure_ai_project=azure_ai_project,
output_path=output_path,
**kwargs,
)
except Exception as e:
# Handle multiprocess bootstrap error
bootstrap_error = (
"An attempt has been made to start a new process before the\n "
"current process has finished its bootstrapping phase."
)
if bootstrap_error in str(e):
error_message = (
"The evaluation failed due to an error during multiprocess bootstrapping."
"Please ensure the evaluate API is properly guarded with the '__main__' block:\n\n"
" if __name__ == '__main__':\n"
" evaluate(...)"
)
raise RuntimeError(error_message)

raise e


def _evaluate(
*,
evaluation_name: Optional[str] = None,
target: Optional[Callable] = None,
data: Optional[str] = None,
evaluators: Optional[Dict[str, Callable]] = None,
evaluator_config: Optional[Dict[str, Dict[str, str]]] = None,
azure_ai_project: Optional[Dict] = None,
output_path: Optional[str] = None,
**kwargs,
):
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None

input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name)
Expand Down
7 changes: 3 additions & 4 deletions src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1

@pytest.mark.parametrize(
"use_thread_pool,function,column",
"use_pf_client,function,column",
[
(True, answer_evaluator, "length"),
(False, answer_evaluator, "length"),
Expand All @@ -156,12 +156,12 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
(False, answer_evaluator_int_dict, "42"),
],
)
def test_evaluate_python_function(self, data_file, use_thread_pool, function, column):
def test_evaluate_python_function(self, data_file, use_pf_client, function, column):
# data
input_data = pd.read_json(data_file, lines=True)

# run the evaluation
result = evaluate(data=data_file, evaluators={"answer": function}, _use_thread_pool=use_thread_pool)
result = evaluate(data=data_file, evaluators={"answer": function}, _use_pf_client=use_pf_client)

row_result_df = pd.DataFrame(result["rows"])
metrics = result["metrics"]
Expand Down Expand Up @@ -422,7 +422,6 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso
"answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json),
"f1_score": F1ScoreEvaluator(),
},
_use_thread_pool=False,
)
assert result is not None
assert "metrics" in result
Expand Down
23 changes: 20 additions & 3 deletions src/promptflow-evals/tests/evals/unittests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import pathlib
from unittest.mock import patch

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -338,14 +339,14 @@ def test_renaming_column(self):
df_actuals = _rename_columns_conditionally(df)
assert_frame_equal(df_actuals.sort_index(axis=1), df_expected.sort_index(axis=1))

@pytest.mark.parametrize("use_thread_pool", [True, False])
def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_thread_pool):
@pytest.mark.parametrize("use_pf_client", [True, False])
def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client):
output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
result = evaluate(
data=evaluate_test_data_jsonl_file,
evaluators={"g": F1ScoreEvaluator()},
output_path=output_path,
_use_thread_pool=use_thread_pool,
_use_pf_client=use_pf_client,
)

assert result is not None
Expand Down Expand Up @@ -381,3 +382,19 @@ def test_evaluate_with_errors(self):
expected.at[2, "outputs.yeti.result"] = np.nan
expected.at[3, "outputs.yeti.result"] = np.nan
assert_frame_equal(expected, result_df)

@patch("promptflow.evals.evaluate._evaluate._evaluate")
def test_evaluate_main_entry_guard(self, mock_evaluate, evaluate_test_data_jsonl_file):
err_msg = (
"An attempt has been made to start a new process before the\n "
"current process has finished its bootstrapping phase."
)
mock_evaluate.side_effect = RuntimeError(err_msg)

with pytest.raises(RuntimeError) as exc_info:
evaluate(
data=evaluate_test_data_jsonl_file,
evaluators={"f1_score": F1ScoreEvaluator()},
)

assert "Please ensure the evaluate API is properly guarded with the '__main__' block" in exc_info.value.args[0]
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@
'ad585ee1806aae44c095f4b3e473e472bb8be141', (91136, 1272)
'ea48203d881e43bd9e027a19525ba88816c9a639', (92672, 14393)
'e53962d6670e3c446a659b93e8ff5900f82bce76', (107520, 14391)
'b3e2c3c192f72b517f5d32e5f416b1f818922bbd', (122368, 3698)
'17d268bf2d53b839d08502d3a92c6ce0f5e67fdd', (126464, 916)
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@
'ad585ee1806aae44c095f4b3e473e472bb8be141', (91136, 1272)
'ea48203d881e43bd9e027a19525ba88816c9a639', (92672, 14393)
'e53962d6670e3c446a659b93e8ff5900f82bce76', (107520, 14391)
'b3e2c3c192f72b517f5d32e5f416b1f818922bbd', (122368, 3698)
'17d268bf2d53b839d08502d3a92c6ce0f5e67fdd', (126464, 916)
1 change: 1 addition & 0 deletions src/promptflow/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Bugs Fixed
- Fix incompatibility with `trace.NoOpTracerProvider` when set exporter to prompt flow service.
- Add missing user agent in trace usage telemetry.

## v1.12.0 (2024.06.11)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) ->
if IS_LEGACY_OPENAI:
completion = openai.Completion.create(
prompt=prompt,
engine="text-ada-001",
engine="gpt-35-turbo-instruct",
max_tokens=256,
temperature=0.8,
top_p=1.0,
Expand All @@ -40,7 +40,7 @@ def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) ->
else:
completion = get_client(connection).completions.create(
prompt=prompt,
model="text-ada-001",
model="gpt-35-turbo-instruct",
max_tokens=256,
temperature=0.8,
top_p=1.0,
Expand Down

0 comments on commit 4595b3b

Please sign in to comment.