diff --git a/.github/workflows/promptflow-executor-unit-test.yml b/.github/workflows/promptflow-executor-unit-test.yml index 969fcff2757..4f8288bd6a6 100644 --- a/.github/workflows/promptflow-executor-unit-test.yml +++ b/.github/workflows/promptflow-executor-unit-test.yml @@ -18,6 +18,7 @@ on: - src/promptflow/promptflow/storage/** - src/promptflow/tests/* - src/promptflow/tests/executor/** + - src/promptflow/tests/test_configs/** - src/promptflow-tracing/promptflow/** - src/promptflow-core/promptflow/** - src/promptflow-devkit/promptflow/** diff --git a/src/promptflow-devkit/CHANGELOG.md b/src/promptflow-devkit/CHANGELOG.md index b23171a1da8..3e499ab7e25 100644 --- a/src/promptflow-devkit/CHANGELOG.md +++ b/src/promptflow-devkit/CHANGELOG.md @@ -4,6 +4,7 @@ ### Bugs Fixed - Fix incompatibility with `trace.NoOpTracerProvider` when set exporter to prompt flow service. +- Add missing user agent in trace usage telemetry. ## v1.12.0 (2024.06.11) diff --git a/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py b/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py index 1bf798659df..9858e832724 100644 --- a/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py +++ b/src/promptflow-devkit/promptflow/_proxy/_python_inspector_proxy.py @@ -1,10 +1,11 @@ +import os import re from pathlib import Path from typing import Any, Dict, List from promptflow._constants import FlowEntryRegex from promptflow._core.entry_meta_generator import _generate_flow_meta -from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT +from promptflow._sdk._constants import FLOW_META_JSON_GEN_TIMEOUT, PF_FLOW_META_LOAD_IN_SUBPROCESS from promptflow._utils.flow_utils import resolve_python_entry_file from ._base_inspector_proxy import AbstractInspectorProxy @@ -35,7 +36,7 @@ def get_entry_meta( **kwargs, ) -> Dict[str, Any]: timeout = kwargs.get("timeout", FLOW_META_JSON_GEN_TIMEOUT) - load_in_subprocess = kwargs.get("load_in_subprocess", True) + load_in_subprocess = os.environ.get(PF_FLOW_META_LOAD_IN_SUBPROCESS, "True").lower() == "true" flow_dag = {"entry": entry} # generate flow.json only for eager flow for now diff --git a/src/promptflow-devkit/promptflow/_sdk/_constants.py b/src/promptflow-devkit/promptflow/_sdk/_constants.py index 16ff8996a36..bc9ce066ae4 100644 --- a/src/promptflow-devkit/promptflow/_sdk/_constants.py +++ b/src/promptflow-devkit/promptflow/_sdk/_constants.py @@ -99,6 +99,7 @@ def _prepare_home_dir() -> Path: PF_SERVICE_DEBUG = "PF_SERVICE_DEBUG" PF_SYSTEM_METRICS_PREFIX = "__pf__" PF_FLOW_ENTRY_IN_TMP = "PF_FLOW_ENTRY_IN_TMP" +PF_FLOW_META_LOAD_IN_SUBPROCESS = "PF_FLOW_META_LOAD_IN_SUBPROCESS" LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve() LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve() diff --git a/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py b/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py index e28511dd1e8..604e54fbc31 100644 --- a/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py +++ b/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py @@ -31,8 +31,10 @@ ) from promptflow._sdk._constants import HOME_PROMPT_FLOW_DIR, AzureMLWorkspaceTriad from promptflow._sdk._telemetry.telemetry import get_telemetry_logger +from promptflow._sdk._user_agent import USER_AGENT from promptflow._sdk.entities._trace import Span from promptflow._utils.logger_utils import get_cli_sdk_logger +from promptflow._utils.user_agent_utils import setup_user_agent_to_operation_context from promptflow.core._errors import MissingRequiredPackage from .general_utils import convert_time_unix_nano_to_timestamp, json_load @@ -345,6 +347,8 @@ class TraceTelemetryHelper: CUSTOM_DIMENSIONS_TRACE_COUNT = "trace_count" def __init__(self): + # `setup_user_agent_to_operation_context` will get user agent and return + self._user_agent = setup_user_agent_to_operation_context(USER_AGENT) self._telemetry_logger = get_telemetry_logger() self._lock = multiprocessing.Lock() self._summary: typing.Dict[TraceCountKey, int] = dict() @@ -369,6 +373,7 @@ def log_telemetry(self) -> None: for key, count in summary_to_log.items(): custom_dimensions = key._asdict() custom_dimensions[self.CUSTOM_DIMENSIONS_TRACE_COUNT] = count + custom_dimensions["user_agent"] = self._user_agent self._telemetry_logger.info(self.TELEMETRY_ACTIVITY_NAME, extra={"custom_dimensions": custom_dimensions}) diff --git a/src/promptflow-devkit/pyproject.toml b/src/promptflow-devkit/pyproject.toml index 5ddad796402..43fee529799 100644 --- a/src/promptflow-devkit/pyproject.toml +++ b/src/promptflow-devkit/pyproject.toml @@ -100,6 +100,7 @@ promptflow = {path = "../promptflow"} promptflow-tools = {path = "../promptflow-tools"} promptflow-recording = {path = "../promptflow-recording"} numpy = "<2.0.0" # avoid pandas incompatibility +tenacity = "<8.4.0" # tenacity has breaking in 8.4.0 [tool.poetry.group.test.dependencies] pytest = "*" diff --git a/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_trace.py b/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_trace.py index d24e8a4c25e..51f618d3ca4 100644 --- a/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_trace.py +++ b/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_trace.py @@ -9,7 +9,7 @@ import os import uuid from typing import Dict -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest from mock import mock @@ -34,7 +34,13 @@ ContextAttributeKey, ) from promptflow._sdk._tracing import setup_exporter_to_pfs, start_trace_with_devkit -from promptflow._sdk._utilities.tracing_utils import WorkspaceKindLocalCache, append_conditions, parse_protobuf_span +from promptflow._sdk._utilities.tracing_utils import ( + TraceCountKey, + TraceTelemetryHelper, + WorkspaceKindLocalCache, + append_conditions, + parse_protobuf_span, +) from promptflow.client import PFClient from promptflow.exceptions import UserErrorException from promptflow.tracing._operation_context import OperationContext @@ -325,3 +331,25 @@ def test_expired_cache(self): mock_get_kind.return_value = kind assert ws_local_cache.get_kind() == kind assert not ws_local_cache.is_expired + + +@pytest.mark.unittest +@pytest.mark.sdk_test +class TestTraceTelemetry: + def test_user_agent_in_custom_dimensions(self): + def mock_info(*args, **kwargs): + extra: dict = kwargs.get("extra") + custom_dimensions: dict = extra.get("custom_dimensions") + assert "user_agent" in custom_dimensions.keys() + assert "promptflow-sdk/" in custom_dimensions["user_agent"] + + mock_telemetry_logger = MagicMock() + mock_telemetry_logger.info = mock_info + with patch("promptflow._sdk._utilities.tracing_utils.get_telemetry_logger", return_value=mock_telemetry_logger): + telemetry_helper = TraceTelemetryHelper() + summary = dict() + k = TraceCountKey(None, None, None, "script", "code") + summary[k] = 1 + # append the mock summary and log + telemetry_helper.append(summary) + telemetry_helper.log_telemetry() diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index f9b55f288ff..3e0b7a9c38a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -3,7 +3,7 @@ # --------------------------------------------------------- import os -from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP +from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS from promptflow._utils.user_agent_utils import ClientUserAgentUtil from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api @@ -23,6 +23,7 @@ def __enter__(self): if isinstance(self.client, ProxyClient): os.environ[PF_FLOW_ENTRY_IN_TMP] = "true" + os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false" def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, CodeClient): @@ -30,3 +31,4 @@ def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, ProxyClient): os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) + os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py index 74496440cf8..af69519aa63 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py @@ -337,7 +337,46 @@ def evaluate( ) """ + try: + return _evaluate( + evaluation_name=evaluation_name, + target=target, + data=data, + evaluators=evaluators, + evaluator_config=evaluator_config, + azure_ai_project=azure_ai_project, + output_path=output_path, + **kwargs, + ) + except Exception as e: + # Handle multiprocess bootstrap error + bootstrap_error = ( + "An attempt has been made to start a new process before the\n " + "current process has finished its bootstrapping phase." + ) + if bootstrap_error in str(e): + error_message = ( + "The evaluation failed due to an error during multiprocess bootstrapping." + "Please ensure the evaluate API is properly guarded with the '__main__' block:\n\n" + " if __name__ == '__main__':\n" + " evaluate(...)" + ) + raise RuntimeError(error_message) + raise e + + +def _evaluate( + *, + evaluation_name: Optional[str] = None, + target: Optional[Callable] = None, + data: Optional[str] = None, + evaluators: Optional[Dict[str, Callable]] = None, + evaluator_config: Optional[Dict[str, Dict[str, str]]] = None, + azure_ai_project: Optional[Dict] = None, + output_path: Optional[str] = None, + **kwargs, +): trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name) diff --git a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py index 08403e21c98..403d7623bef 100644 --- a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py +++ b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py @@ -146,7 +146,7 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file, assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1 @pytest.mark.parametrize( - "use_thread_pool,function,column", + "use_pf_client,function,column", [ (True, answer_evaluator, "length"), (False, answer_evaluator, "length"), @@ -156,12 +156,12 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file, (False, answer_evaluator_int_dict, "42"), ], ) - def test_evaluate_python_function(self, data_file, use_thread_pool, function, column): + def test_evaluate_python_function(self, data_file, use_pf_client, function, column): # data input_data = pd.read_json(data_file, lines=True) # run the evaluation - result = evaluate(data=data_file, evaluators={"answer": function}, _use_thread_pool=use_thread_pool) + result = evaluate(data=data_file, evaluators={"answer": function}, _use_pf_client=use_pf_client) row_result_df = pd.DataFrame(result["rows"]) metrics = result["metrics"] @@ -422,7 +422,6 @@ def test_evaluate_aggregation(self, data_file, return_json, aggregate_return_jso "answer_length": AnswerLength(return_json=return_json, aggregate_return_json=aggregate_return_json), "f1_score": F1ScoreEvaluator(), }, - _use_thread_pool=False, ) assert result is not None assert "metrics" in result diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py index 17dfd141daf..01d308b062d 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py +++ b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py @@ -1,6 +1,7 @@ import json import os import pathlib +from unittest.mock import patch import numpy as np import pandas as pd @@ -338,14 +339,14 @@ def test_renaming_column(self): df_actuals = _rename_columns_conditionally(df) assert_frame_equal(df_actuals.sort_index(axis=1), df_expected.sort_index(axis=1)) - @pytest.mark.parametrize("use_thread_pool", [True, False]) - def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_thread_pool): + @pytest.mark.parametrize("use_pf_client", [True, False]) + def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client): output_path = os.path.join(tmpdir, "eval_test_results.jsonl") result = evaluate( data=evaluate_test_data_jsonl_file, evaluators={"g": F1ScoreEvaluator()}, output_path=output_path, - _use_thread_pool=use_thread_pool, + _use_pf_client=use_pf_client, ) assert result is not None @@ -381,3 +382,19 @@ def test_evaluate_with_errors(self): expected.at[2, "outputs.yeti.result"] = np.nan expected.at[3, "outputs.yeti.result"] = np.nan assert_frame_equal(expected, result_df) + + @patch("promptflow.evals.evaluate._evaluate._evaluate") + def test_evaluate_main_entry_guard(self, mock_evaluate, evaluate_test_data_jsonl_file): + err_msg = ( + "An attempt has been made to start a new process before the\n " + "current process has finished its bootstrapping phase." + ) + mock_evaluate.side_effect = RuntimeError(err_msg) + + with pytest.raises(RuntimeError) as exc_info: + evaluate( + data=evaluate_test_data_jsonl_file, + evaluators={"f1_score": F1ScoreEvaluator()}, + ) + + assert "Please ensure the evaluate API is properly guarded with the '__main__' block" in exc_info.value.args[0] diff --git a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.bak b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.bak index b1ed1b12c3f..708f20db4e2 100644 --- a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.bak +++ b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.bak @@ -35,3 +35,5 @@ 'ad585ee1806aae44c095f4b3e473e472bb8be141', (91136, 1272) 'ea48203d881e43bd9e027a19525ba88816c9a639', (92672, 14393) 'e53962d6670e3c446a659b93e8ff5900f82bce76', (107520, 14391) +'b3e2c3c192f72b517f5d32e5f416b1f818922bbd', (122368, 3698) +'17d268bf2d53b839d08502d3a92c6ce0f5e67fdd', (126464, 916) diff --git a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dat b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dat index 494a0a81736..a1397df2e98 100644 Binary files a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dat and b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dat differ diff --git a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dir b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dir index b1ed1b12c3f..708f20db4e2 100644 --- a/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dir +++ b/src/promptflow-recording/recordings/local/executor_node_cache.shelve.dir @@ -35,3 +35,5 @@ 'ad585ee1806aae44c095f4b3e473e472bb8be141', (91136, 1272) 'ea48203d881e43bd9e027a19525ba88816c9a639', (92672, 14393) 'e53962d6670e3c446a659b93e8ff5900f82bce76', (107520, 14391) +'b3e2c3c192f72b517f5d32e5f416b1f818922bbd', (122368, 3698) +'17d268bf2d53b839d08502d3a92c6ce0f5e67fdd', (126464, 916) diff --git a/src/promptflow/CHANGELOG.md b/src/promptflow/CHANGELOG.md index 09a5620d4d7..7c2c889607c 100644 --- a/src/promptflow/CHANGELOG.md +++ b/src/promptflow/CHANGELOG.md @@ -4,6 +4,7 @@ ### Bugs Fixed - Fix incompatibility with `trace.NoOpTracerProvider` when set exporter to prompt flow service. +- Add missing user agent in trace usage telemetry. ## v1.12.0 (2024.06.11) diff --git a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py index b8f15a5ea5e..e9bf0bd2ef7 100644 --- a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py +++ b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py @@ -28,7 +28,7 @@ def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) -> if IS_LEGACY_OPENAI: completion = openai.Completion.create( prompt=prompt, - engine="text-ada-001", + engine="gpt-35-turbo-instruct", max_tokens=256, temperature=0.8, top_p=1.0, @@ -40,7 +40,7 @@ def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) -> else: completion = get_client(connection).completions.create( prompt=prompt, - model="text-ada-001", + model="gpt-35-turbo-instruct", max_tokens=256, temperature=0.8, top_p=1.0,