From eb112d29b36a1448d37c36936a91fea41e9a0e2e Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Wed, 3 Jul 2024 12:44:50 -0700 Subject: [PATCH 01/14] Update changelog --- src/promptflow-evals/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/promptflow-evals/CHANGELOG.md b/src/promptflow-evals/CHANGELOG.md index a10ddbf154a..35a0557d732 100644 --- a/src/promptflow-evals/CHANGELOG.md +++ b/src/promptflow-evals/CHANGELOG.md @@ -3,6 +3,7 @@ Please insert change log into "Next Release" ONLY. ## Next release +- Exposed batch evaluation run timeout variable via "PF_BATCH_TIMEOUT_SEC" environment variable ## 0.0.1 - Introduced package From 7fa3e7dc01910aea364fcdb0dd4cbf94c7d25bef Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Wed, 3 Jul 2024 13:01:14 -0700 Subject: [PATCH 02/14] Expose batch timeout environment variable --- src/promptflow-evals/promptflow/evals/_constants.py | 3 ++- .../evals/evaluate/_batch_run_client/code_client.py | 9 ++++++--- .../evals/evaluate/_batch_run_client/proxy_client.py | 9 ++++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index c31b88322e3..42f803f2b72 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -23,4 +23,5 @@ class Prefixes: CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4 -BATCH_RUN_TIMEOUT = 3600 +PF_BATCH_TIMEOUT_DEFAULT = 3600 +PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC" diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py index 10336f80ecb..7debb3c7cbd 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py @@ -7,11 +7,12 @@ import pandas as pd +from promptflow._utils.utils import get_int_env_var from promptflow.contracts.types import AttrDict from promptflow.evals.evaluate._utils import _apply_column_mapping, _has_aggregator, load_jsonl from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._constants import BATCH_RUN_TIMEOUT +from ..._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC LOGGER = logging.getLogger(__name__) @@ -24,15 +25,17 @@ def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None self.aggregated_metrics = aggregated_metrics def get_result_df(self, exclude_inputs=False): - result_df = self.run.result(timeout=BATCH_RUN_TIMEOUT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + result_df = self.run.result(timeout=batch_run_timeout) if exclude_inputs: result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")]) return result_df def get_aggregated_metrics(self): try: + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) aggregated_metrics = ( - self.aggregated_metrics.result(timeout=BATCH_RUN_TIMEOUT) + self.aggregated_metrics.result(timeout=batch_run_timeout) if self.aggregated_metrics is not None else None ) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py index 4c58b729483..30b9982bb23 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py @@ -5,10 +5,11 @@ import numpy as np +from promptflow._utils.utils import get_int_env_var from promptflow.client import PFClient from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._constants import BATCH_RUN_TIMEOUT +from ..._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC LOGGER = logging.getLogger(__name__) @@ -30,11 +31,13 @@ def run(self, flow, data, column_mapping=None, **kwargs): return ProxyRun(run=eval_future) def get_details(self, proxy_run, all_results=False): - run = proxy_run.run.result(timeout=BATCH_RUN_TIMEOUT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + run = proxy_run.run.result(timeout=batch_run_timeout) result_df = self._pf_client.get_details(run, all_results=all_results) result_df.replace("(Failed)", np.nan, inplace=True) return result_df def get_metrics(self, proxy_run): - run = proxy_run.run.result(timeout=BATCH_RUN_TIMEOUT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + run = proxy_run.run.result(timeout=batch_run_timeout) return self._pf_client.get_metrics(run) From d94ac651ea3f4d7806e21ac353d43743b86367dc Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Wed, 3 Jul 2024 13:01:48 -0700 Subject: [PATCH 03/14] Add test --- .../evals/unittests/test_batch_run_context.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index b67bba15c30..89dd72f1c06 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -1,10 +1,13 @@ +import os from unittest.mock import MagicMock import pytest from promptflow.client import PFClient +from promptflow.evals._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC from promptflow.evals._user_agent import USER_AGENT from promptflow.evals.evaluate._batch_run_client import BatchRunContext, CodeClient +from promptflow.evals.evaluate._batch_run_client.code_client import CodeRun @pytest.fixture @@ -17,6 +20,11 @@ def pf_client_mock(): return MagicMock(spec=PFClient) +@pytest.fixture +def code_run_mock(): + return MagicMock() + + @pytest.mark.unittest class TestBatchRunContext: def test_with_codeclient(self, mocker, code_client_mock): @@ -50,3 +58,14 @@ def test_with_pfclient(self, mocker, pf_client_mock): pass mock_recover_openai_api.assert_not_called() + + def test_get_result_timeout(self, code_run_mock): + code_run_instance = CodeRun(run=code_run_mock, input_data={}) + code_run_instance.get_result_df() + + code_run_mock.result.assert_called_once_with(timeout=PF_BATCH_TIMEOUT_DEFAULT) + + custom_timeout = "100000" + os.environ[PF_BATCH_TIMEOUT_SEC] = custom_timeout + code_run_instance.get_result_df() + code_run_mock.result.assert_called_with(timeout=int(custom_timeout)) From 844055658ac45384992d9dd454b27ed4479ad4cd Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Thu, 11 Jul 2024 09:49:00 -0700 Subject: [PATCH 04/14] Change PF_BATCH_TIMEOUT_DEFAULT constant name to PF_BATCH_TIMEOUT_SEC_DEFAULT --- .../evals/evaluate/_batch_run_client/code_client.py | 6 +++--- .../evals/evaluate/_batch_run_client/proxy_client.py | 6 +++--- .../tests/evals/unittests/test_batch_run_context.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py index 7debb3c7cbd..5a47820c64d 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py @@ -12,7 +12,7 @@ from promptflow.evals.evaluate._utils import _apply_column_mapping, _has_aggregator, load_jsonl from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC +from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT LOGGER = logging.getLogger(__name__) @@ -25,7 +25,7 @@ def __init__(self, run, input_data, evaluator_name=None, aggregated_metrics=None self.aggregated_metrics = aggregated_metrics def get_result_df(self, exclude_inputs=False): - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) result_df = self.run.result(timeout=batch_run_timeout) if exclude_inputs: result_df = result_df.drop(columns=[col for col in result_df.columns if col.startswith("inputs.")]) @@ -33,7 +33,7 @@ def get_result_df(self, exclude_inputs=False): def get_aggregated_metrics(self): try: - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) aggregated_metrics = ( self.aggregated_metrics.result(timeout=batch_run_timeout) if self.aggregated_metrics is not None diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py index 30b9982bb23..ed771c353e2 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py @@ -9,7 +9,7 @@ from promptflow.client import PFClient from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC +from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT LOGGER = logging.getLogger(__name__) @@ -31,13 +31,13 @@ def run(self, flow, data, column_mapping=None, **kwargs): return ProxyRun(run=eval_future) def get_details(self, proxy_run, all_results=False): - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) run = proxy_run.run.result(timeout=batch_run_timeout) result_df = self._pf_client.get_details(run, all_results=all_results) result_df.replace("(Failed)", np.nan, inplace=True) return result_df def get_metrics(self, proxy_run): - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_DEFAULT) + batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) run = proxy_run.run.result(timeout=batch_run_timeout) return self._pf_client.get_metrics(run) diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index 89dd72f1c06..8fa3d5f27f4 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -4,7 +4,7 @@ import pytest from promptflow.client import PFClient -from promptflow.evals._constants import PF_BATCH_TIMEOUT_DEFAULT, PF_BATCH_TIMEOUT_SEC +from promptflow.evals._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT from promptflow.evals._user_agent import USER_AGENT from promptflow.evals.evaluate._batch_run_client import BatchRunContext, CodeClient from promptflow.evals.evaluate._batch_run_client.code_client import CodeRun @@ -63,7 +63,7 @@ def test_get_result_timeout(self, code_run_mock): code_run_instance = CodeRun(run=code_run_mock, input_data={}) code_run_instance.get_result_df() - code_run_mock.result.assert_called_once_with(timeout=PF_BATCH_TIMEOUT_DEFAULT) + code_run_mock.result.assert_called_once_with(timeout=PF_BATCH_TIMEOUT_SEC_DEFAULT) custom_timeout = "100000" os.environ[PF_BATCH_TIMEOUT_SEC] = custom_timeout From ac0d64a524fc9b16f976487188ac7a390b3d1de4 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Thu, 11 Jul 2024 09:55:33 -0700 Subject: [PATCH 05/14] Change PF_BATCH_TIMEOUT_DEFAULT constant name to PF_BATCH_TIMEOUT_SEC_DEFAULT --- src/promptflow-evals/promptflow/evals/_constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 42f803f2b72..5166f6a464e 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -23,5 +23,5 @@ class Prefixes: CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4 -PF_BATCH_TIMEOUT_DEFAULT = 3600 +PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600 PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC" From 1181b28a4acc2ed8a7266840ad49cae8227a31b2 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Fri, 12 Jul 2024 11:58:12 -0700 Subject: [PATCH 06/14] Copy get_int_env_var to promptflow-evals utils --- .../evaluate/_batch_run_client/code_client.py | 3 +-- .../_batch_run_client/proxy_client.py | 2 +- .../promptflow/evals/evaluate/_utils.py | 25 ++++++++++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py index 5a47820c64d..e791d35bdc8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py @@ -7,9 +7,8 @@ import pandas as pd -from promptflow._utils.utils import get_int_env_var from promptflow.contracts.types import AttrDict -from promptflow.evals.evaluate._utils import _apply_column_mapping, _has_aggregator, load_jsonl +from promptflow.evals.evaluate._utils import _apply_column_mapping, _has_aggregator, get_int_env_var, load_jsonl from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py index ed771c353e2..65af01499eb 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py @@ -5,8 +5,8 @@ import numpy as np -from promptflow._utils.utils import get_int_env_var from promptflow.client import PFClient +from promptflow.evals.evaluate._utils import get_int_env_var from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_utils.py b/src/promptflow-evals/promptflow/evals/evaluate/_utils.py index 5ff657707c0..ca270ed839b 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_utils.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_utils.py @@ -14,7 +14,6 @@ from promptflow.evals._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, Prefixes from promptflow.evals.evaluate._eval_run import EvalRun - LOGGER = logging.getLogger(__name__) AZURE_WORKSPACE_REGEX_FORMAT = ( @@ -78,7 +77,11 @@ def _azure_pf_client_and_triad(trace_destination): def _log_metrics_and_instance_results( - metrics, instance_results, trace_destination, run, evaluation_name, + metrics, + instance_results, + trace_destination, + run, + evaluation_name, ) -> str: if trace_destination is None: LOGGER.error("Unable to log traces as trace destination was not defined.") @@ -192,7 +195,7 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace if match is not None: pattern = match.group(1) if pattern.startswith(pattern_prefix): - map_from_key = pattern[len(pattern_prefix):] + map_from_key = pattern[len(pattern_prefix) :] elif pattern.startswith(run_outputs_prefix): # Target-generated columns always starts from .outputs. map_from_key = f"{Prefixes._TGT_OUTPUTS}{pattern[len(run_outputs_prefix) :]}" @@ -216,3 +219,19 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace def _has_aggregator(evaluator): return hasattr(evaluator, "__aggregate__") + + +def get_int_env_var(env_var_name, default_value=None): + """ + The function `get_int_env_var` retrieves an integer environment variable value, with an optional + default value if the variable is not set or cannot be converted to an integer. + + :param env_var_name: The name of the environment variable you want to retrieve the value of + :param default_value: The default value is the value that will be returned if the environment + variable is not found or if it cannot be converted to an integer + :return: an integer value. + """ + try: + return int(os.environ.get(env_var_name, default_value)) + except Exception: + return default_value From 6c109dffb7409fb512f7f4cc734a8146e986bcb8 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Fri, 12 Jul 2024 19:38:39 -0700 Subject: [PATCH 07/14] Remove timeout logic from proxy_client to batch_run_context --- .../evaluate/_batch_run_client/batch_run_context.py | 5 +++++ .../evals/evaluate/_batch_run_client/proxy_client.py | 9 ++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index 3e0b7a9c38a..8493f1f6553 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -5,6 +5,7 @@ from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS from promptflow._utils.user_agent_utils import ClientUserAgentUtil +from promptflow.evals._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api from ..._user_agent import USER_AGENT @@ -25,6 +26,9 @@ def __enter__(self): os.environ[PF_FLOW_ENTRY_IN_TMP] = "true" os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false" + if os.environ[PF_BATCH_TIMEOUT_SEC] is None: + os.environ[PF_BATCH_TIMEOUT_SEC] = PF_BATCH_TIMEOUT_SEC_DEFAULT + def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, CodeClient): recover_openai_api() @@ -32,3 +36,4 @@ def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, ProxyClient): os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None) + os.environ.pop(PF_BATCH_TIMEOUT_SEC, None) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py index 65af01499eb..f7f76740233 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py @@ -6,11 +6,8 @@ import numpy as np from promptflow.client import PFClient -from promptflow.evals.evaluate._utils import get_int_env_var from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor -from ..._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT - LOGGER = logging.getLogger(__name__) @@ -31,13 +28,11 @@ def run(self, flow, data, column_mapping=None, **kwargs): return ProxyRun(run=eval_future) def get_details(self, proxy_run, all_results=False): - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) - run = proxy_run.run.result(timeout=batch_run_timeout) + run = proxy_run.run.result() result_df = self._pf_client.get_details(run, all_results=all_results) result_df.replace("(Failed)", np.nan, inplace=True) return result_df def get_metrics(self, proxy_run): - batch_run_timeout = get_int_env_var(PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT) - run = proxy_run.run.result(timeout=batch_run_timeout) + run = proxy_run.run.result() return self._pf_client.get_metrics(run) From d00ceb4aa13a08a45cb045a37acd5a40e0d66ae6 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Fri, 12 Jul 2024 20:48:03 -0700 Subject: [PATCH 08/14] Add tests for batch context --- .../_batch_run_client/batch_run_context.py | 4 +-- .../evals/unittests/test_batch_run_context.py | 35 +++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index 8493f1f6553..2a1811a2ee5 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -26,8 +26,8 @@ def __enter__(self): os.environ[PF_FLOW_ENTRY_IN_TMP] = "true" os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false" - if os.environ[PF_BATCH_TIMEOUT_SEC] is None: - os.environ[PF_BATCH_TIMEOUT_SEC] = PF_BATCH_TIMEOUT_SEC_DEFAULT + if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None: + os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT) def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, CodeClient): diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index 8fa3d5f27f4..98db07bc979 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -6,8 +6,7 @@ from promptflow.client import PFClient from promptflow.evals._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT from promptflow.evals._user_agent import USER_AGENT -from promptflow.evals.evaluate._batch_run_client import BatchRunContext, CodeClient -from promptflow.evals.evaluate._batch_run_client.code_client import CodeRun +from promptflow.evals.evaluate._batch_run_client import BatchRunContext, CodeClient, ProxyClient @pytest.fixture @@ -20,11 +19,6 @@ def pf_client_mock(): return MagicMock(spec=PFClient) -@pytest.fixture -def code_run_mock(): - return MagicMock() - - @pytest.mark.unittest class TestBatchRunContext: def test_with_codeclient(self, mocker, code_client_mock): @@ -59,13 +53,24 @@ def test_with_pfclient(self, mocker, pf_client_mock): mock_recover_openai_api.assert_not_called() - def test_get_result_timeout(self, code_run_mock): - code_run_instance = CodeRun(run=code_run_mock, input_data={}) - code_run_instance.get_result_df() + def test_batch_timeout_default(self): + before_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) + assert before_timeout is None + + with BatchRunContext(ProxyClient(PFClient)): + during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) + assert during_timeout == PF_BATCH_TIMEOUT_SEC_DEFAULT + + after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) + assert after_timeout is None + + def test_batch_timeout_custom(self): + custom_timeout = 10000000 + os.environ[PF_BATCH_TIMEOUT_SEC] = str(custom_timeout) - code_run_mock.result.assert_called_once_with(timeout=PF_BATCH_TIMEOUT_SEC_DEFAULT) + with BatchRunContext(ProxyClient(PFClient)): + during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) + assert during_timeout == custom_timeout - custom_timeout = "100000" - os.environ[PF_BATCH_TIMEOUT_SEC] = custom_timeout - code_run_instance.get_result_df() - code_run_mock.result.assert_called_with(timeout=int(custom_timeout)) + after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) + assert after_timeout is None From 0e6d5f61805e6fe72bf934290cb9d796949fcf16 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Sat, 13 Jul 2024 14:18:22 -0700 Subject: [PATCH 09/14] Update __exit__ to only reset if environment variable is default --- .../evals/evaluate/_batch_run_client/batch_run_context.py | 4 +++- .../tests/evals/unittests/test_batch_run_context.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index 2a1811a2ee5..e6b9e69fbd2 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -36,4 +36,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, ProxyClient): os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None) - os.environ.pop(PF_BATCH_TIMEOUT_SEC, None) + + if os.environ.get(PF_BATCH_TIMEOUT_SEC) == str(PF_BATCH_TIMEOUT_SEC_DEFAULT): + os.environ.pop(PF_BATCH_TIMEOUT_SEC, None) diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index 98db07bc979..483579299a8 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -61,6 +61,7 @@ def test_batch_timeout_default(self): during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) assert during_timeout == PF_BATCH_TIMEOUT_SEC_DEFAULT + # Default timeout should be reset after exiting BatchRunContext after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) assert after_timeout is None @@ -72,5 +73,6 @@ def test_batch_timeout_custom(self): during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) assert during_timeout == custom_timeout - after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) - assert after_timeout is None + # Custom timeouts should not be reset after exiting BatchRunContext + after_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) + assert after_timeout == custom_timeout From 3953b38ec2adacfd84e20fe7c56227c62a916bea Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Sat, 13 Jul 2024 14:22:31 -0700 Subject: [PATCH 10/14] Update CHANGELOG to use promptflow format --- src/promptflow-evals/CHANGELOG.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/promptflow-evals/CHANGELOG.md b/src/promptflow-evals/CHANGELOG.md index 35a0557d732..0641021b6af 100644 --- a/src/promptflow-evals/CHANGELOG.md +++ b/src/promptflow-evals/CHANGELOG.md @@ -1,9 +1,16 @@ -# promptflow-evals package +# Release History -Please insert change log into "Next Release" ONLY. +## v0.3.2 (Upcoming) -## Next release +### Features Added + +### Bugs Fixed + +### Improvements - Exposed batch evaluation run timeout variable via "PF_BATCH_TIMEOUT_SEC" environment variable -## 0.0.1 -- Introduced package +## v0.3.1 (2022-07-09) +- This release contains minor bug fixes and improvements. + +## v0.3.0 (2024-05-17) +- Initial release of promptflow-evals package. From fef7729a8d396286c43b342c509cb1ec664f7097 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Sat, 13 Jul 2024 17:46:29 -0700 Subject: [PATCH 11/14] Lower test timeout value to be under limit --- .../tests/evals/unittests/test_batch_run_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py index 483579299a8..b62a4ff3cfc 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py +++ b/src/promptflow-evals/tests/evals/unittests/test_batch_run_context.py @@ -66,7 +66,7 @@ def test_batch_timeout_default(self): assert after_timeout is None def test_batch_timeout_custom(self): - custom_timeout = 10000000 + custom_timeout = 1000 os.environ[PF_BATCH_TIMEOUT_SEC] = str(custom_timeout) with BatchRunContext(ProxyClient(PFClient)): From 736b39f4286678e4aa3a5bac7084b4eb213a54e7 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Fri, 19 Jul 2024 12:00:23 -0700 Subject: [PATCH 12/14] Add flag and check if batch timeout was set by system --- src/promptflow-evals/promptflow/evals/_constants.py | 1 + .../evaluate/_batch_run_client/batch_run_context.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 5166f6a464e..7d482aba5f4 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -25,3 +25,4 @@ class Prefixes: PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600 PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC" +PF_IS_BATCH_TIMEOUT_SYSTEM_SET = "PF_IS_BATCH_TIMEOUT_SYSTEM_SET" diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index e6b9e69fbd2..613b7158394 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -5,7 +5,11 @@ from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS from promptflow._utils.user_agent_utils import ClientUserAgentUtil -from promptflow.evals._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT +from promptflow.evals._constants import ( + PF_BATCH_TIMEOUT_SEC, + PF_BATCH_TIMEOUT_SEC_DEFAULT, + PF_IS_BATCH_TIMEOUT_SYSTEM_SET, +) from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api from ..._user_agent import USER_AGENT @@ -28,6 +32,9 @@ def __enter__(self): if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None: os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT) + os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "true" + else: + os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "false" def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(self.client, CodeClient): @@ -37,5 +44,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None) - if os.environ.get(PF_BATCH_TIMEOUT_SEC) == str(PF_BATCH_TIMEOUT_SEC_DEFAULT): + if os.environ.get(PF_IS_BATCH_TIMEOUT_SYSTEM_SET) == "true": os.environ.pop(PF_BATCH_TIMEOUT_SEC, None) + os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "false" From bdb7fa7b5a646b130d20efb904e0d2be6e7c68cf Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Fri, 19 Jul 2024 13:22:00 -0700 Subject: [PATCH 13/14] Make changelog more clear --- src/promptflow-evals/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/promptflow-evals/CHANGELOG.md b/src/promptflow-evals/CHANGELOG.md index e2f28c30e4b..fd3238d7a86 100644 --- a/src/promptflow-evals/CHANGELOG.md +++ b/src/promptflow-evals/CHANGELOG.md @@ -4,7 +4,7 @@ ### Features Added - Introduced `JailbreakAdversarialSimulator` for customers who need to do run jailbreak and non jailbreak adversarial simulations at the same time. More info in the README.md in `/promptflow/evals/synthetic/README.md#jailbreak-simulator` -- Exposed batch evaluation run timeout via "PF_BATCH_TIMEOUT_SEC" environment variable +- Exposed batch evaluation run timeout via "PF_BATCH_TIMEOUT_SEC" environment variable. This variable can be used to set the timeout for the batch evaluation for each evaluator and target separately only, not the entire API call. ### Bugs Fixed - Large simulation was causing a jinja exception, this has been fixed. From f9867bcf4b3ed786106a68d954017f9bb88c0545 Mon Sep 17 00:00:00 2001 From: Diondra Peck Date: Mon, 22 Jul 2024 08:52:20 -0700 Subject: [PATCH 14/14] Replace set by system environment variable with class attribute --- .../promptflow/evals/_constants.py | 1 - .../_batch_run_client/batch_run_context.py | 15 +++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 7d482aba5f4..5166f6a464e 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -25,4 +25,3 @@ class Prefixes: PF_BATCH_TIMEOUT_SEC_DEFAULT = 3600 PF_BATCH_TIMEOUT_SEC = "PF_BATCH_TIMEOUT_SEC" -PF_IS_BATCH_TIMEOUT_SYSTEM_SET = "PF_IS_BATCH_TIMEOUT_SYSTEM_SET" diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py index 66ca2874fe0..1d188c98ce7 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py @@ -5,11 +5,7 @@ from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP, PF_FLOW_META_LOAD_IN_SUBPROCESS from promptflow._utils.user_agent_utils import ClientUserAgentUtil -from promptflow.evals._constants import ( - PF_BATCH_TIMEOUT_SEC, - PF_BATCH_TIMEOUT_SEC_DEFAULT, - PF_IS_BATCH_TIMEOUT_SYSTEM_SET, -) +from promptflow.evals._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api from ..._user_agent import USER_AGENT @@ -21,6 +17,7 @@ class BatchRunContext: def __init__(self, client): self.client = client + self._is_timeout_set_by_system = False def __enter__(self): if isinstance(self.client, CodeClient): @@ -33,9 +30,7 @@ def __enter__(self): if os.environ.get(PF_BATCH_TIMEOUT_SEC) is None: os.environ[PF_BATCH_TIMEOUT_SEC] = str(PF_BATCH_TIMEOUT_SEC_DEFAULT) - os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "true" - else: - os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "false" + self._is_timeout_set_by_system = True # For addressing the issue of asyncio event loop closed on Windows set_event_loop_policy() @@ -48,6 +43,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) os.environ.pop(PF_FLOW_META_LOAD_IN_SUBPROCESS, None) - if os.environ.get(PF_IS_BATCH_TIMEOUT_SYSTEM_SET) == "true": + if self._is_timeout_set_by_system: os.environ.pop(PF_BATCH_TIMEOUT_SEC, None) - os.environ[PF_IS_BATCH_TIMEOUT_SYSTEM_SET] = "false" + self._is_timeout_set_by_system = False