diff --git a/.github/workflows/promptflow-evals-e2e-test-azure.yml b/.github/workflows/promptflow-evals-e2e-test-azure.yml
index 75cb2c5d422..811ddf9d31e 100644
--- a/.github/workflows/promptflow-evals-e2e-test-azure.yml
+++ b/.github/workflows/promptflow-evals-e2e-test-azure.yml
@@ -14,28 +14,11 @@ env:
WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: snok/install-poetry@v1
- - name: build
- run: poetry build
- working-directory: ${{ env.WORKING_DIRECTORY }}
- - uses: actions/upload-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
test:
- needs: build
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
- # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
- # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
- # Add 3.9 back after we figure out the issue
- python-version: ['3.8', '3.10', '3.11']
+ python-version: ['3.8', '3.9', '3.10', '3.11']
fail-fast: false
# snok/install-poetry need this to support Windows
defaults:
@@ -52,10 +35,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- uses: snok/install-poetry@v1
- - uses: actions/download-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}
- name: install test dependency group
run: poetry install --only test
working-directory: ${{ env.WORKING_DIRECTORY }}
@@ -67,10 +46,7 @@ jobs:
poetry run pip install -e ../promptflow-tracing
poetry run pip install -e ../promptflow-tools
poetry run pip install -e ../promptflow-azure
- working-directory: ${{ env.WORKING_DIRECTORY }}
- - name: install promptflow-evals from wheel
- # wildcard expansion (*) does not work in Windows, so leverage python to find and install
- run: poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
+ poetry run pip install -e ../promptflow-evals
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install recording
run: poetry run pip install -e ../promptflow-recording
diff --git a/.github/workflows/promptflow-evals-e2e-test-local.yml b/.github/workflows/promptflow-evals-e2e-test-local.yml
index f5cef2aa4d2..3bc658f3159 100644
--- a/.github/workflows/promptflow-evals-e2e-test-local.yml
+++ b/.github/workflows/promptflow-evals-e2e-test-local.yml
@@ -18,9 +18,6 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
- # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
- # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
- # Add 3.9 back after we figure out the issue
python-version: ['3.8', '3.9', '3.10', '3.11']
fail-fast: false
# snok/install-poetry need this to support Windows
diff --git a/.github/workflows/promptflow-evals-installation-test.yml b/.github/workflows/promptflow-evals-installation-test.yml
new file mode 100644
index 00000000000..67208444217
--- /dev/null
+++ b/.github/workflows/promptflow-evals-installation-test.yml
@@ -0,0 +1,61 @@
+name: promptflow-evals-installation-test
+
+on:
+ schedule:
+ - cron: "40 10 * * *" # 2:40 PST every day
+ pull_request:
+ paths:
+ - src/promptflow-evals/**
+ - .github/workflows/promptflow-evals-installation-test.yml
+ workflow_dispatch:
+
+env:
+ IS_IN_CI_PIPELINE: "true"
+ WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
+ PROMPT_FLOW_TEST_MODE: "live"
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: snok/install-poetry@v1
+ - name: build
+ run: poetry build
+ working-directory: ${{ env.WORKING_DIRECTORY }}
+ - uses: actions/upload-artifact@v4
+ with:
+ name: promptflow-evals
+ path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
+
+ test:
+ needs: build
+ strategy:
+ matrix:
+ os: [ubuntu-latest, windows-latest, macos-13]
+ python-version: ['3.8', '3.9', '3.10', '3.11']
+ fail-fast: false
+ # snok/install-poetry need this to support Windows
+ defaults:
+ run:
+ shell: bash
+ runs-on: ${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/download-artifact@v4
+ with:
+ name: promptflow-evals
+ path: ${{ env.WORKING_DIRECTORY }}
+ - name: install virtualenv
+ run: python -m pip install virtualenv
+ working-directory: ${{ env.WORKING_DIRECTORY }}
+ - name: install promptflow-evals from wheel
+ id: install_promptflow_no_extras
+ run: |
+ bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -l "300"
+ working-directory: ${{ env.WORKING_DIRECTORY }}
+ - name: install promptflow-evals from wheel
+ id: install_promptflow_with_extras
+ run: |
+ bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -e "[azure]" -l "300"
+ working-directory: ${{ env.WORKING_DIRECTORY }}
\ No newline at end of file
diff --git a/.github/workflows/promptflow-evals-regression-test.yml b/.github/workflows/promptflow-evals-performance-test.yml
similarity index 58%
rename from .github/workflows/promptflow-evals-regression-test.yml
rename to .github/workflows/promptflow-evals-performance-test.yml
index 626aba2fb8d..3643e9591fc 100644
--- a/.github/workflows/promptflow-evals-regression-test.yml
+++ b/.github/workflows/promptflow-evals-performance-test.yml
@@ -1,4 +1,4 @@
-name: promptflow-evals-regression-test
+name: promptflow-evals-performance-test
on:
schedule:
@@ -6,7 +6,7 @@ on:
pull_request:
paths:
- src/promptflow-evals/**
- - .github/workflows/promptflow-evals-regression-test.yml
+ - .github/workflows/promptflow-evals-performance-test.yml
workflow_dispatch:
env:
@@ -15,28 +15,11 @@ env:
PROMPT_FLOW_TEST_MODE: "live"
jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: snok/install-poetry@v1
- - name: build
- run: poetry build
- working-directory: ${{ env.WORKING_DIRECTORY }}
- - uses: actions/upload-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
test:
- needs: build
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
- # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
- # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
- # Add 3.9 back after we figure out the issue
- python-version: ['3.8', '3.10', '3.11']
+ python-version: ['3.8', '3.9', '3.10', '3.11']
fail-fast: false
# snok/install-poetry need this to support Windows
defaults:
@@ -49,10 +32,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- uses: snok/install-poetry@v1
- - uses: actions/download-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}
- name: install test dependency group
run: poetry install --only test
working-directory: ${{ env.WORKING_DIRECTORY }}
@@ -60,17 +39,13 @@ jobs:
id: install_promptflow
run: |
# Estimate the installation time.
- export start_tm=`date +%s`
poetry run pip install -e ../promptflow
poetry run pip install -e ../promptflow-core
poetry run pip install -e ../promptflow-devkit
poetry run pip install -e ../promptflow-tracing
poetry run pip install -e ../promptflow-tools
poetry run pip install -e ../promptflow-azure
- poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
- export install_time=$((`date +%s` - ${start_tm}))
- poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity install_time_s --value $install_time --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
- test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
+ poetry run pip install -e ../promptflow-evals
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install recording
run: poetry run pip install -e ../promptflow-recording
diff --git a/.github/workflows/promptflow-evals-unit-test.yml b/.github/workflows/promptflow-evals-unit-test.yml
index 4533bc824f5..e61af03bd2a 100644
--- a/.github/workflows/promptflow-evals-unit-test.yml
+++ b/.github/workflows/promptflow-evals-unit-test.yml
@@ -14,21 +14,7 @@ env:
WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: snok/install-poetry@v1
- - name: build
- run: poetry build
- working-directory: ${{ env.WORKING_DIRECTORY }}
- - uses: actions/upload-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
test:
- needs: build
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
@@ -45,17 +31,11 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- uses: snok/install-poetry@v1
- - uses: actions/download-artifact@v4
- with:
- name: promptflow-evals
- path: ${{ env.WORKING_DIRECTORY }}
- name: install test dependency group
run: poetry install --only test
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install promptflow packages in editable mode
run: |
- export TIME_LIMIT=5
- export start_tm=`date +%s`
poetry run pip install -e ../promptflow
poetry run pip install -e ../promptflow-core
poetry run pip install -e ../promptflow-devkit
@@ -63,10 +43,6 @@ jobs:
poetry run pip install -e ../promptflow-tools
poetry run pip install -e ../promptflow-azure
poetry run pip install -e ../promptflow-evals
- export install_time=$(((`date +%s` - ${start_tm})/60))
- echo "The installation took ${install_time} minutes."
- echo "The time limit for installation is ${TIME_LIMIT}"
- test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install recording
run: poetry run pip install -e ../promptflow-recording
diff --git a/scripts/code_qa/calculate_install_time.sh b/scripts/code_qa/calculate_install_time.sh
new file mode 100644
index 00000000000..1c344298b04
--- /dev/null
+++ b/scripts/code_qa/calculate_install_time.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+print_usage(){
+ if [ $# -gt 0 ]; then
+ echo "Missing argument ${1}"
+ fi
+ echo "Usage:"
+ echo "$0 -r [github run id] -w [github workflow] -a [github action id] -b [github ref id] -e [Optional extras] -f [ should we fail?] -l [instal, time limit]"
+ echo "Extras should be written as it appears in pip, for example for promptflow-evals[azure], it will be [azure]"
+ echo "Flag -f does not require parameter."
+ exit 1
+}
+
+run_id=""
+workflow=""
+action=""
+ref=""
+fail=0
+extras=""
+limit=""
+
+
+while getopts ":r:w:a:b:e:l:f" opt; do
+# Parse options
+ case $opt in
+ (r) run_id="$OPTARG";;
+ (w) workflow="$OPTARG";;
+ (a) action="$OPTARG";;
+ (b) ref="$OPTARG";;
+ (e) extras="$OPTARG";;
+ (f) ((fail++));;
+ (l) limit="$OPTARG";;
+ \?) print_usage;;
+ esac
+done
+
+for v in "run_id" "workflow" "action" "ref" "limit"; do
+ if [ -z ${!v} ]; then
+ print_usage "$v"
+ fi
+done
+
+ENV_DIR="test_pf_ev"
+python -m virtualenv "${ENV_DIR}"
+# Make activate command platform independent
+ACTIVATE="${ENV_DIR}/bin/activate"
+if [ ! -f "$ACTIVATE" ]; then
+ ACTIVATE="${ENV_DIR}/Scripts/activate"
+fi
+source "${ACTIVATE}"
+# Estimate the installation time.
+pf_evals_wheel=`ls -1 promptflow_evals-*`
+echo "The downloaded wheel file ${pf_evals_wheel}"
+packages=`python -m pip freeze | wc -l`
+start_tm=`date +%s`
+echo "python -m pip install \"./${pf_evals_wheel}${extras}\" --no-cache-dir"
+python -m pip install "./${pf_evals_wheel}${extras}" --no-cache-dir
+install_time=$((`date +%s` - ${start_tm}))
+packages_installed=$((`python -m pip freeze | wc -l` - packages))
+# Log the install time
+python `dirname "$0"`/report_to_app_insights.py --activity "install_time_s" --value "{\"install_time_s\": ${install_time}, \"number_of_packages_installed\": ${packages_installed}}" --git-hub-action-run-id "${run_id}" --git-hub-workflow "${workflow}" --git-hub-action "${action}" --git-branch "${ref}"
+deactivate
+rm -rf test_pf_ev
+echo "Installed ${packages_installed} packages per ${install_time} seconds."
+if [ $fail -eq 0 ]; then
+ # Swallow the exit code 1 and just show the warning, understandable by
+ # github UI.
+ test ${install_time} -le $limit || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} seconds, the limit is ${limit}."
+else
+ test ${install_time} -le $limit
+fi
+# Return the exit code of test command of of echo i.e. 0.
+exit $?
\ No newline at end of file
diff --git a/scripts/code_qa/report_to_app_insights.py b/scripts/code_qa/report_to_app_insights.py
index 35c885f14a6..50ed20eb1d7 100644
--- a/scripts/code_qa/report_to_app_insights.py
+++ b/scripts/code_qa/report_to_app_insights.py
@@ -1,6 +1,7 @@
from typing import Dict, Optional, Union
import argparse
+import json
import platform
from promptflow._sdk._configuration import Configuration
@@ -25,12 +26,12 @@ def parse_junit_xml(fle: str) -> Dict[str, Dict[str, Union[float, str]]]:
for child in test.childNodes:
if child.nodeName == 'failure':
- test_results['fail_message'] = child.attributes["message"].value
+ test_results[test_name]['fail_message'] = child.attributes["message"].value
return test_results
def main(activity_name: str,
- value: float,
+ value: Union[float, str],
run_id: str,
workflow: str,
action: str,
@@ -70,9 +71,15 @@ def main(activity_name: str,
if junit_file:
junit_dict = parse_junit_xml(junit_file)
for k, v in junit_dict.items():
- activity_info[k] = -1 if v["fail_message"] else v['time']
+ if v["fail_message"]:
+ # Do not log time together with fail message.
+ continue
+ activity_info[k] = v['time']
else:
- activity_info["value"] = value
+ if isinstance(value, str):
+ activity_info.update(json.loads(value))
+ else:
+ activity_info["value"] = value
# write information to the application insights.
logger.info(action, extra={"custom_dimensions": activity_info})
@@ -83,8 +90,11 @@ def main(activity_name: str,
description="Log the value to application insights along with platform characteristics and run ID.")
parser.add_argument('--activity', help='The activity to be logged.',
required=True)
- parser.add_argument('--value', type=float, help='The value for activity.',
- required=False, default=-1)
+ parser.add_argument(
+ '--value',
+ help='The floating point value for activity or a set of values in key-value format.',
+ required=False,
+ default=-1)
parser.add_argument('--junit-xml', help='The path to junit-xml file.',
dest="junit_xml", required=False, default=None)
parser.add_argument('--git-hub-action-run-id', dest='run_id',
diff --git a/scripts/code_qa/test_reporter.py b/scripts/code_qa/test_reporter.py
new file mode 100644
index 00000000000..29258fc6bcc
--- /dev/null
+++ b/scripts/code_qa/test_reporter.py
@@ -0,0 +1,67 @@
+import os
+import platform
+import pytest
+
+from unittest.mock import patch, MagicMock
+
+import report_to_app_insights
+import tempfile
+
+
+class TestReporter:
+ """The set of local tests to test reporting to application insights."""
+
+ @pytest.mark.parametrize(
+ 'value,expected_val',
+ [
+ (42, {'value': 42.}),
+ ('{"foo": 1, "bar": 2}', {"foo": 1, "bar": 2})
+ ]
+ )
+ def test_logging_value(self, value, expected_val):
+ """Test loading values from."""
+ mock_logger = MagicMock()
+ expected = {
+ "activity_name": 'test_act',
+ "activity_type": "ci_cd_analytics",
+ "OS": platform.system(),
+ "OS_release": platform.release(),
+ "branch": "some_branch",
+ "git_hub_action_run_id": "gh_run_id",
+ "git_hub_workflow": "gh_wf"
+ }
+ expected.update(expected_val)
+ with patch('report_to_app_insights.get_telemetry_logger', return_value=mock_logger):
+ report_to_app_insights.main(
+ 'test_act', value, "gh_run_id", "gh_wf", 'my_action', "some_branch", junit_file=None)
+ mock_logger.info.assert_called_with('my_action', extra={'custom_dimensions': expected})
+
+ def test_log_junit_xml(self):
+ """Test that we are loading junit xml files as expected."""
+ content = (
+ ''
+ ''
+ ''
+ 'fail :('
+ ''
+ )
+ mock_logger = MagicMock()
+ expected = {
+ "activity_name": 'test_act',
+ "activity_type": "ci_cd_analytics",
+ "OS": platform.system(),
+ "OS_release": platform.release(),
+ "MyTestClass1::my_successful_test_method": 4.2,
+ "branch": "some_branch",
+ "git_hub_action_run_id": "gh_run_id",
+ "git_hub_workflow": "gh_wf"
+ }
+ with tempfile.TemporaryDirectory() as d:
+ file_xml = os.path.join(d, "test-results.xml")
+ with open(file_xml, 'w') as f:
+ f.write(content)
+ with patch('report_to_app_insights.get_telemetry_logger', return_value=mock_logger):
+ report_to_app_insights.main(
+ 'test_act', -1, "gh_run_id", "gh_wf", 'my_action', "some_branch", junit_file=file_xml)
+
+ mock_logger.info.assert_called_with('my_action', extra={'custom_dimensions': expected})
diff --git a/src/promptflow-devkit/CHANGELOG.md b/src/promptflow-devkit/CHANGELOG.md
index ccd05c50964..81b7e7e9888 100644
--- a/src/promptflow-devkit/CHANGELOG.md
+++ b/src/promptflow-devkit/CHANGELOG.md
@@ -1,7 +1,8 @@
# promptflow-devkit package
## v1.14.0 (Upcoming)
-TODO
+### Improvements
+- Add `promptflow` to dockerfile when build flow with `python_requirements_txt` incase promptflow not exists in custom requirements.
## v1.13.0 (2024.06.28)
diff --git a/src/promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py b/src/promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py
index f6e244cc6f5..6466b5f576c 100644
--- a/src/promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py
+++ b/src/promptflow-devkit/promptflow/_proxy/_python_executor_proxy.py
@@ -85,6 +85,14 @@ async def exec_aggregation_async(
) -> AggregationResult:
return self._flow_executor.exec_aggregation(batch_inputs, aggregation_inputs, run_id=run_id)
+ async def exec_line_async(
+ self,
+ inputs: Mapping[str, Any],
+ index: Optional[int] = None,
+ run_id: Optional[str] = None,
+ ) -> LineResult:
+ return await self._flow_executor.exec_line_async(inputs, index, run_id)
+
async def _exec_batch(
self,
batch_inputs: List[Mapping[str, Any]],
diff --git a/src/promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py b/src/promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
index 2c7f4edf0dd..d12d99f3847 100644
--- a/src/promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
+++ b/src/promptflow-devkit/promptflow/_sdk/_orchestrator/run_submitter.py
@@ -131,7 +131,7 @@ def _run_bulk(self, run: Run, stream=False, **kwargs):
with flow_overwrite_context(
flow_obj, tuning_node, variant, connections=run.connections, init_kwargs=run.init
) as flow:
- self._submit_bulk_run(flow=flow, run=run, local_storage=local_storage)
+ self._submit_bulk_run(flow=flow, run=run, local_storage=local_storage, **kwargs)
@classmethod
def _validate_inputs(cls, run: Run):
@@ -140,7 +140,7 @@ def _validate_inputs(cls, run: Run):
raise UserErrorException(message=str(error), error=error)
def _submit_bulk_run(
- self, flow: Union[Flow, FlexFlow, Prompty], run: Run, local_storage: LocalStorageOperations
+ self, flow: Union[Flow, FlexFlow, Prompty], run: Run, local_storage: LocalStorageOperations, **kwargs
) -> dict:
logger.info(f"Submitting run {run.name}, log path: {local_storage.logger.file_path}")
run_id = run.name
@@ -183,6 +183,7 @@ def _submit_bulk_run(
storage=local_storage,
log_path=local_storage.logger.file_path,
init_kwargs=run.init,
+ **kwargs,
)
batch_result = batch_engine.run(
input_dirs=input_dirs,
diff --git a/src/promptflow-devkit/promptflow/_sdk/data/docker/Dockerfile.jinja2 b/src/promptflow-devkit/promptflow/_sdk/data/docker/Dockerfile.jinja2
index 7e6b25c6189..b4bd747db85 100644
--- a/src/promptflow-devkit/promptflow/_sdk/data/docker/Dockerfile.jinja2
+++ b/src/promptflow-devkit/promptflow/_sdk/data/docker/Dockerfile.jinja2
@@ -28,6 +28,7 @@ RUN conda create -n {{env.conda_env_name}} python=3.9.16 pip=23.0.1 -q -y && \
conda run -n {{env.conda_env_name}} \
{% if env.python_requirements_txt %}
pip install -r /flow/{{env.python_requirements_txt}} && \
+ conda run -n {{env.conda_env_name}} pip install promptflow && \
{% else %}
{% if env.sdk_version %}
pip install promptflow=={{env.sdk_version}} \
diff --git a/src/promptflow-devkit/promptflow/batch/_batch_engine.py b/src/promptflow-devkit/promptflow/batch/_batch_engine.py
index 8333a43c7cc..04c5bdeba8f 100644
--- a/src/promptflow-devkit/promptflow/batch/_batch_engine.py
+++ b/src/promptflow-devkit/promptflow/batch/_batch_engine.py
@@ -151,6 +151,7 @@ def __init__(
self._storage = storage if storage else DefaultRunStorage(base_dir=self._working_dir)
self._kwargs = kwargs
+ self._batch_use_async = kwargs.get("batch_use_async", False)
self._batch_timeout_sec = batch_timeout_sec or get_int_env_var("PF_BATCH_TIMEOUT_SEC")
self._line_timeout_sec = line_timeout_sec or get_int_env_var("PF_LINE_TIMEOUT_SEC", LINE_TIMEOUT_SEC)
self._worker_count = worker_count or get_int_env_var("PF_WORKER_COUNT")
@@ -472,7 +473,7 @@ async def _exec(
# execute lines
is_timeout = False
- if isinstance(self._executor_proxy, PythonExecutorProxy):
+ if not self._batch_use_async and isinstance(self._executor_proxy, PythonExecutorProxy):
results, is_timeout = await self._executor_proxy._exec_batch(
inputs_to_run,
output_dir,
@@ -656,6 +657,8 @@ def _check_eager_flow_and_language_from_yaml(self):
def _batch_timeout_expired(self) -> bool:
# Currently, local PythonExecutorProxy will handle the batch timeout by itself.
- if self._batch_timeout_sec is None or isinstance(self._executor_proxy, PythonExecutorProxy):
+ if self._batch_timeout_sec is None or (
+ not self._batch_use_async and isinstance(self._executor_proxy, PythonExecutorProxy)
+ ):
return False
return (datetime.utcnow() - self._start_time).total_seconds() > self._batch_timeout_sec
diff --git a/src/promptflow-evals/CHANGELOG.md b/src/promptflow-evals/CHANGELOG.md
index 18a44ecd269..2f6f62e7095 100644
--- a/src/promptflow-evals/CHANGELOG.md
+++ b/src/promptflow-evals/CHANGELOG.md
@@ -1,20 +1,21 @@
-# promptflow-evals package
+# Release History
-Please insert change log into "Next Release" ONLY.
-
-## Next release
-
-## 0.3.2
+## v0.3.2 (Upcoming)
+### Features Added
- Introduced `JailbreakAdversarialSimulator` for customers who need to do run jailbreak and non jailbreak adversarial simulations at the same time. More info in the README.md in `/promptflow/evals/synthetic/README.md#jailbreak-simulator`
-- The `AdversarialSimulator` responds with `category` of harm in the response.
-
-- Large simulation was causing a jinja exception, this has been fixed
+### Bugs Fixed
+- Large simulation was causing a jinja exception, this has been fixed.
+- Reduced chances of NaNs in quality evaluators.
+### Improvements
+- Converted built-in evaluators to async-based implementation, leveraging async batch run for performance improvement.
- Parity between evals and Simulator on signature, passing credentials.
+- The `AdversarialSimulator` responds with `category` of harm in the response.
-- Reduced chances of NaNs in GPT based evaluators.
+## v0.3.1 (2022-07-09)
+- This release contains minor bug fixes and improvements.
-## 0.0.1
-- Introduced package
+## v0.3.0 (2024-05-17)
+- Initial release of promptflow-evals package.
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py
index 3e0b7a9c38a..9b810bf09c5 100644
--- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py
+++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py
@@ -8,6 +8,7 @@
from promptflow.tracing._integrations._openai_injector import inject_openai_api, recover_openai_api
from ..._user_agent import USER_AGENT
+from .._utils import set_event_loop_policy
from .code_client import CodeClient
from .proxy_client import ProxyClient
@@ -25,6 +26,9 @@ def __enter__(self):
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
os.environ[PF_FLOW_META_LOAD_IN_SUBPROCESS] = "false"
+ # For addressing the issue of asyncio event loop closed on Windows
+ set_event_loop_policy()
+
def __exit__(self, exc_type, exc_val, exc_tb):
if isinstance(self.client, CodeClient):
recover_openai_api()
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py
index 4c58b729483..b04ac32cb9a 100644
--- a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py
+++ b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/proxy_client.py
@@ -1,6 +1,7 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
+import inspect
import logging
import numpy as np
@@ -24,8 +25,18 @@ def __init__(self, pf_client: PFClient):
self._thread_pool = ThreadPoolExecutor(thread_name_prefix="evaluators_thread")
def run(self, flow, data, column_mapping=None, **kwargs):
+ flow_to_run = flow
+ if hasattr(flow, "_to_async"):
+ flow_to_run = flow._to_async()
+
+ batch_use_async = self._should_batch_use_async(flow_to_run)
eval_future = self._thread_pool.submit(
- self._pf_client.run, flow, data=data, column_mapping=column_mapping, **kwargs
+ self._pf_client.run,
+ flow_to_run,
+ data=data,
+ column_mapping=column_mapping,
+ batch_use_async=batch_use_async,
+ **kwargs
)
return ProxyRun(run=eval_future)
@@ -38,3 +49,12 @@ def get_details(self, proxy_run, all_results=False):
def get_metrics(self, proxy_run):
run = proxy_run.run.result(timeout=BATCH_RUN_TIMEOUT)
return self._pf_client.get_metrics(run)
+
+ @staticmethod
+ def _should_batch_use_async(flow):
+ if hasattr(flow, "__call__") and inspect.iscoroutinefunction(flow.__call__):
+ return True
+ elif inspect.iscoroutinefunction(flow):
+ return True
+ else:
+ return False
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_utils.py b/src/promptflow-evals/promptflow/evals/evaluate/_utils.py
index cc41848ed2a..5f8fd061b5e 100644
--- a/src/promptflow-evals/promptflow/evals/evaluate/_utils.py
+++ b/src/promptflow-evals/promptflow/evals/evaluate/_utils.py
@@ -14,7 +14,6 @@
from promptflow.evals._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, Prefixes
from promptflow.evals.evaluate._eval_run import EvalRun
-
LOGGER = logging.getLogger(__name__)
AZURE_WORKSPACE_REGEX_FORMAT = (
@@ -62,7 +61,11 @@ def _azure_pf_client_and_triad(trace_destination):
def _log_metrics_and_instance_results(
- metrics, instance_results, trace_destination, run, evaluation_name,
+ metrics,
+ instance_results,
+ trace_destination,
+ run,
+ evaluation_name,
) -> str:
if trace_destination is None:
LOGGER.error("Unable to log traces as trace destination was not defined.")
@@ -175,7 +178,7 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace
if match is not None:
pattern = match.group(1)
if pattern.startswith(pattern_prefix):
- map_from_key = pattern[len(pattern_prefix):]
+ map_from_key = pattern[len(pattern_prefix) :]
elif pattern.startswith(run_outputs_prefix):
# Target-generated columns always starts from .outputs.
map_from_key = f"{Prefixes._TGT_OUTPUTS}{pattern[len(run_outputs_prefix) :]}"
@@ -199,3 +202,13 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace
def _has_aggregator(evaluator):
return hasattr(evaluator, "__aggregate__")
+
+
+def set_event_loop_policy():
+ import asyncio
+ import platform
+
+ if platform.system().lower() == "windows":
+ # Reference: https://stackoverflow.com/questions/45600579/asyncio-event-loop-is-closed-when-getting-loop
+ # On Windows seems to be a problem with EventLoopPolicy, use this snippet to work around it
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py b/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py
index 0566b427af7..7b5aae042b8 100644
--- a/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py
+++ b/src/promptflow-evals/promptflow/evals/evaluators/_coherence/_coherence.py
@@ -47,7 +47,7 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
model_config.api_version = "2024-02-15-preview"
prompty_model_config = {"configuration": model_config}
- prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-user-agent": USER_AGENT}}}) \
+ prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_fluency/_fluency.py b/src/promptflow-evals/promptflow/evals/evaluators/_fluency/_fluency.py
index 93d1b061423..c5027f8b707 100644
--- a/src/promptflow-evals/promptflow/evals/evaluators/_fluency/_fluency.py
+++ b/src/promptflow-evals/promptflow/evals/evaluators/_fluency/_fluency.py
@@ -7,14 +7,48 @@
import numpy as np
-from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+from promptflow.core import AsyncPrompty, AzureOpenAIModelConfiguration
+
try:
from ..._user_agent import USER_AGENT
except ImportError:
USER_AGENT = None
+class _AsyncFluencyEvaluator:
+ def __init__(self, model_config: AzureOpenAIModelConfiguration):
+ if model_config.api_version is None:
+ model_config.api_version = "2024-02-15-preview"
+
+ prompty_model_config = {"configuration": model_config}
+ prompty_model_config.update(
+ {"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}
+ ) if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
+ current_dir = os.path.dirname(__file__)
+ prompty_path = os.path.join(current_dir, "fluency.prompty")
+ self._flow = AsyncPrompty.load(source=prompty_path, model=prompty_model_config)
+
+ async def __call__(self, *, question: str, answer: str, **kwargs):
+ # Validate input parameters
+ question = str(question or "")
+ answer = str(answer or "")
+
+ if not (question.strip() and answer.strip()):
+ raise ValueError("Both 'question' and 'answer' must be non-empty strings.")
+
+ # Run the evaluation flow
+ llm_output = await self._flow(question=question, answer=answer)
+
+ score = np.nan
+ if llm_output:
+ match = re.search(r"\d", llm_output)
+ if match:
+ score = float(match.group())
+
+ return {"gpt_fluency": float(score)}
+
+
class FluencyEvaluator:
"""
Initialize a fluency evaluator configured for a specific Azure OpenAI model.
@@ -41,17 +75,7 @@ class FluencyEvaluator:
"""
def __init__(self, model_config: AzureOpenAIModelConfiguration):
- # TODO: Remove this block once the bug is fixed
- # https://msdata.visualstudio.com/Vienna/_workitems/edit/3151324
- if model_config.api_version is None:
- model_config.api_version = "2024-02-15-preview"
-
- prompty_model_config = {"configuration": model_config}
- prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-user-agent": USER_AGENT}}}) \
- if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
- current_dir = os.path.dirname(__file__)
- prompty_path = os.path.join(current_dir, "fluency.prompty")
- self._flow = load_flow(source=prompty_path, model=prompty_model_config)
+ self._async_evaluator = _AsyncFluencyEvaluator(model_config)
def __call__(self, *, question: str, answer: str, **kwargs):
"""
@@ -64,20 +88,7 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:return: The fluency score.
:rtype: dict
"""
- # Validate input parameters
- question = str(question or "")
- answer = str(answer or "")
+ return async_run_allowing_running_loop(self._async_evaluator, question=question, answer=answer, **kwargs)
- if not (question.strip() and answer.strip()):
- raise ValueError("Both 'question' and 'answer' must be non-empty strings.")
-
- # Run the evaluation flow
- llm_output = self._flow(question=question, answer=answer)
-
- score = np.nan
- if llm_output:
- match = re.search(r"\d", llm_output)
- if match:
- score = float(match.group())
-
- return {"gpt_fluency": float(score)}
+ def _to_async(self):
+ return self._async_evaluator
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py b/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py
index 6eccd607814..b0aa3390c50 100644
--- a/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py
+++ b/src/promptflow-evals/promptflow/evals/evaluators/_groundedness/_groundedness.py
@@ -49,7 +49,7 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
prompty_model_config = {"configuration": model_config}
- prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-user-agent": USER_AGENT}}}) \
+ prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py b/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py
index 8e67f0e34ab..b8e4fef00d6 100644
--- a/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py
+++ b/src/promptflow-evals/promptflow/evals/evaluators/_relevance/_relevance.py
@@ -52,7 +52,7 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
"configuration": model_config,
}
- prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-user-agent": USER_AGENT}}})\
+ prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}})\
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py b/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py
index f556eca4309..e0413a7a8d7 100644
--- a/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py
+++ b/src/promptflow-evals/promptflow/evals/evaluators/_similarity/_similarity.py
@@ -48,7 +48,7 @@ def __init__(self, model_config: AzureOpenAIModelConfiguration):
model_config.api_version = "2024-02-15-preview"
prompty_model_config = {"configuration": model_config}
- prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-user-agent": USER_AGENT}}}) \
+ prompty_model_config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) \
if USER_AGENT and isinstance(model_config, AzureOpenAIModelConfiguration) else None
current_dir = os.path.dirname(__file__)
prompty_path = os.path.join(current_dir, "similarity.prompty")
diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py
index e184b334628..3566f8222e2 100644
--- a/src/promptflow-evals/tests/evals/conftest.py
+++ b/src/promptflow-evals/tests/evals/conftest.py
@@ -5,7 +5,6 @@
from unittest.mock import patch
import pytest
-
from pytest_mock import MockerFixture
from promptflow.client import PFClient
@@ -36,6 +35,7 @@ def is_record():
def is_replay():
return False
+
# Import of optional packages
AZURE_INSTALLED = True
try:
@@ -95,6 +95,8 @@ def model_config() -> dict:
raise ValueError(f"Connection '{conn_name}' not found in dev connections.")
model_config = AzureOpenAIModelConfiguration(**dev_connections[conn_name]["value"])
+ # Default to gpt-35-turbo for capacity reasons
+ model_config.azure_deployment = "gpt-35-turbo"
AzureOpenAIModelConfiguration.__repr__ = lambda self: ""
@@ -368,12 +370,13 @@ def pytest_collection_modifyitems(items):
parents = {}
for item in items:
# Check if parent contains 'localtest' marker and remove it.
- if any(mark.name == 'localtest' for mark in item.parent.own_markers) or id(item.parent) in parents:
+ if any(mark.name == "localtest" for mark in item.parent.own_markers) or id(item.parent) in parents:
if id(item.parent) not in parents:
item.parent.own_markers = [
- marker for marker in item.own_markers if getattr(marker, 'name', None) != 'localtest']
+ marker for marker in item.own_markers if getattr(marker, "name", None) != "localtest"
+ ]
parents[id(item.parent)] = item.parent
- if not item.get_closest_marker('azuretest'):
+ if not item.get_closest_marker("azuretest"):
# If item's parent was marked as 'localtest', mark the child as such, but not if
# it was marked as 'azuretest'.
item.add_marker(pytest.mark.localtest)
diff --git a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
index f57c05e35ce..36116719932 100644
--- a/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
+++ b/src/promptflow-evals/tests/evals/e2etests/test_evaluate.py
@@ -1,6 +1,7 @@
import json
import os
import pathlib
+import time
import numpy as np
import pandas as pd
@@ -8,7 +9,12 @@
import requests
from promptflow.evals.evaluate import evaluate
-from promptflow.evals.evaluators import ContentSafetyEvaluator, F1ScoreEvaluator, GroundednessEvaluator
+from promptflow.evals.evaluators import (
+ ContentSafetyEvaluator,
+ F1ScoreEvaluator,
+ FluencyEvaluator,
+ GroundednessEvaluator,
+)
@pytest.fixture
@@ -154,6 +160,32 @@ def test_evaluate_with_content_safety_evaluator(self, project_scope, data_file,
assert 0 <= metrics.get("content_safety.self_harm_defect_rate") <= 1
assert 0 <= metrics.get("content_safety.hate_unfairness_defect_rate") <= 1
+ @pytest.mark.performance_test
+ def test_evaluate_with_async_enabled_evaluator(self, model_config, data_file):
+ fluency_eval = FluencyEvaluator(model_config)
+
+ start_time = time.time()
+ result = evaluate(
+ data=data_file,
+ evaluators={
+ "fluency": fluency_eval,
+ },
+ )
+ end_time = time.time()
+ duration = end_time - start_time
+
+ row_result_df = pd.DataFrame(result["rows"])
+ metrics = result["metrics"]
+
+ # validate the results
+ assert result is not None
+ assert result["rows"] is not None
+ input_data = pd.read_json(data_file, lines=True)
+ assert row_result_df.shape[0] == len(input_data)
+ assert "outputs.fluency.gpt_fluency" in row_result_df.columns.to_list()
+ assert "fluency.gpt_fluency" in metrics.keys()
+ assert duration < 10, f"evaluate API call took too long: {duration} seconds"
+
@pytest.mark.parametrize(
"use_pf_client,function,column",
[
diff --git a/src/promptflow-evals/tests/evals/unittests/test_built_in_evaluator.py b/src/promptflow-evals/tests/evals/unittests/test_built_in_evaluator.py
index 29c747b2fed..ec756594808 100644
--- a/src/promptflow-evals/tests/evals/unittests/test_built_in_evaluator.py
+++ b/src/promptflow-evals/tests/evals/unittests/test_built_in_evaluator.py
@@ -5,12 +5,16 @@
from promptflow.evals.evaluators import FluencyEvaluator
+async def fluency_async_mock():
+ return "1"
+
+
@pytest.mark.usefixtures("mock_model_config")
@pytest.mark.unittest
class TestBuiltInEvaluators:
def test_fluency_evaluator(self, mock_model_config):
fluency_eval = FluencyEvaluator(model_config=mock_model_config)
- fluency_eval._flow = MagicMock(return_value="1")
+ fluency_eval._async_evaluator._flow = MagicMock(return_value=fluency_async_mock())
score = fluency_eval(question="What is the capital of Japan?", answer="The capital of Japan is Tokyo.")
@@ -19,7 +23,7 @@ def test_fluency_evaluator(self, mock_model_config):
def test_fluency_evaluator_non_string_inputs(self, mock_model_config):
fluency_eval = FluencyEvaluator(model_config=mock_model_config)
- fluency_eval._flow = MagicMock(return_value="1")
+ fluency_eval._async_evaluator._flow = MagicMock(return_value=fluency_async_mock())
score = fluency_eval(question={"foo": 1}, answer={"bar": "2"})
@@ -28,7 +32,7 @@ def test_fluency_evaluator_non_string_inputs(self, mock_model_config):
def test_fluency_evaluator_empty_string(self, mock_model_config):
fluency_eval = FluencyEvaluator(model_config=mock_model_config)
- fluency_eval._flow = MagicMock(return_value="1")
+ fluency_eval._async_evaluator._flow = MagicMock(return_value=fluency_async_mock())
with pytest.raises(ValueError) as exc_info:
fluency_eval(question="What is the capital of Japan?", answer=None)
diff --git a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.bak b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.bak
index 101db702d58..749b651238c 100644
--- a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.bak
+++ b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.bak
@@ -41,3 +41,10 @@
'a4c1141c2441edb1a62856177868a2242292a0be', (167936, 3388)
'3cd3028235ab4f02d7ee074fbc5667dd90d8a282', (171520, 3574)
'99493a54841e1a28fb054b71d2adc27e0be2ff5e', (175104, 3573)
+'50f3e636259f7dfe9c86d070b4c0752fdcc1cfdd', (178688, 4400)
+'cb45b5a6e3897d4a687e5f673a6924eebccb529c', (183296, 3456)
+'089d2dc2622d2035e182f44dfebfce832c32be91', (186880, 5044)
+'b74db0f7d27659cd5160bee77fd1402490fc0764', (192000, 3454)
+'9da70c55984adfd99de7d7d35452bb119706a14c', (195584, 3417)
+'70d94a59cf7aca95a8fe7faa2e8db14a05cf1773', (199168, 3438)
+'7771928ea1d8a376edd1ac6ab344d3d1855b015e', (202752, 3431)
diff --git a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dat b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dat
index 8d73df0cbf1..5ed8f052edf 100644
Binary files a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dat and b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dat differ
diff --git a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dir b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dir
index 101db702d58..749b651238c 100644
--- a/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dir
+++ b/src/promptflow-evals/tests/recordings/local/evals.node_cache.shelve.dir
@@ -41,3 +41,10 @@
'a4c1141c2441edb1a62856177868a2242292a0be', (167936, 3388)
'3cd3028235ab4f02d7ee074fbc5667dd90d8a282', (171520, 3574)
'99493a54841e1a28fb054b71d2adc27e0be2ff5e', (175104, 3573)
+'50f3e636259f7dfe9c86d070b4c0752fdcc1cfdd', (178688, 4400)
+'cb45b5a6e3897d4a687e5f673a6924eebccb529c', (183296, 3456)
+'089d2dc2622d2035e182f44dfebfce832c32be91', (186880, 5044)
+'b74db0f7d27659cd5160bee77fd1402490fc0764', (192000, 3454)
+'9da70c55984adfd99de7d7d35452bb119706a14c', (195584, 3417)
+'70d94a59cf7aca95a8fe7faa2e8db14a05cf1773', (199168, 3438)
+'7771928ea1d8a376edd1ac6ab344d3d1855b015e', (202752, 3431)
diff --git a/src/promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py b/src/promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
index 5faa7630830..628a6955514 100644
--- a/src/promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
+++ b/src/promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
@@ -70,7 +70,14 @@ def inject_headers(kwargs):
injected_headers = get_aoai_telemetry_headers()
original_headers = kwargs.get("headers" if IS_LEGACY_OPENAI else "extra_headers")
if original_headers and isinstance(original_headers, dict):
- injected_headers.update(original_headers)
+ for header in original_headers.keys():
+ if header in injected_headers:
+ # If the key already exists in injected_headers, concatenate the values with a space
+ injected_headers[header] = " ".join([injected_headers[header], original_headers[header]])
+ else:
+ # If the key does not exist in injected_headers, add it directly
+ injected_headers[header] = original_headers[header]
+
kwargs["headers" if IS_LEGACY_OPENAI else "extra_headers"] = injected_headers
if asyncio.iscoroutinefunction(f):
diff --git a/src/promptflow-tracing/tests/unittests/test_openai_injector.py b/src/promptflow-tracing/tests/unittests/test_openai_injector.py
index 61f6e684d39..8466da0375c 100644
--- a/src/promptflow-tracing/tests/unittests/test_openai_injector.py
+++ b/src/promptflow-tracing/tests/unittests/test_openai_injector.py
@@ -43,17 +43,19 @@ def f(**kwargs):
if IS_LEGACY_OPENAI:
headers = "headers"
- kwargs_1 = {"headers": {"a": 1, "b": 2}}
+ kwargs_1 = {"headers": {"a": 1, "b": 2, "x-ms-useragent": "user_agent_test"}}
kwargs_2 = {"headers": {"ms-azure-ai-promptflow-called-from": "aoai-tool"}}
else:
headers = "extra_headers"
- kwargs_1 = {"extra_headers": {"a": 1, "b": 2}}
+ kwargs_1 = {"extra_headers": {"a": 1, "b": 2, "x-ms-useragent": "user_agent_test"}}
kwargs_2 = {"extra_headers": {"ms-azure-ai-promptflow-called-from": "aoai-tool"}}
injected_headers = get_aoai_telemetry_headers()
+ user_agent = injected_headers.get("x-ms-useragent", None)
+ assert user_agent is not None
assert f(a=1, b=2) == {"a": 1, "b": 2, headers: injected_headers}
- merged_headers = {**injected_headers, "a": 1, "b": 2}
+ merged_headers = {**injected_headers, "a": 1, "b": 2, "x-ms-useragent": " ".join([user_agent, "user_agent_test"])}
assert f(**kwargs_1) == {headers: merged_headers}
aoai_tools_headers = injected_headers.copy()
@@ -70,17 +72,19 @@ async def f(**kwargs):
if IS_LEGACY_OPENAI:
headers = "headers"
- kwargs_1 = {"headers": {"a": 1, "b": 2}}
+ kwargs_1 = {"headers": {"a": 1, "b": 2, "x-ms-useragent": "user_agent_test"}}
kwargs_2 = {"headers": {"ms-azure-ai-promptflow-called-from": "aoai-tool"}}
else:
headers = "extra_headers"
- kwargs_1 = {"extra_headers": {"a": 1, "b": 2}}
+ kwargs_1 = {"extra_headers": {"a": 1, "b": 2, "x-ms-useragent": "user_agent_test"}}
kwargs_2 = {"extra_headers": {"ms-azure-ai-promptflow-called-from": "aoai-tool"}}
injected_headers = get_aoai_telemetry_headers()
+ user_agent = injected_headers.get("x-ms-useragent", None)
+ assert user_agent is not None
assert await f(a=1, b=2) == {"a": 1, "b": 2, headers: injected_headers}
- merged_headers = {**injected_headers, "a": 1, "b": 2}
+ merged_headers = {**injected_headers, "a": 1, "b": 2, "x-ms-useragent": " ".join([user_agent, "user_agent_test"])}
assert await f(**kwargs_1) == {headers: merged_headers}
aoai_tools_headers = injected_headers.copy()
diff --git a/src/promptflow/CHANGELOG.md b/src/promptflow/CHANGELOG.md
index e814429b2ac..d624c28dae7 100644
--- a/src/promptflow/CHANGELOG.md
+++ b/src/promptflow/CHANGELOG.md
@@ -1,7 +1,8 @@
# Release History
## v1.14.0 (Upcoming)
-TODO
+### Improvements
+- [promptflow-devkit] Add `promptflow` to dockerfile when build flow with `python_requirements_txt` incase promptflow not exists in custom requirements.
## v1.13.0 (2024.06.28)
diff --git a/src/promptflow/tests/executor/unittests/_utils/test_async_utils.py b/src/promptflow/tests/executor/unittests/_utils/test_async_utils.py
new file mode 100644
index 00000000000..ebcdf6a90b7
--- /dev/null
+++ b/src/promptflow/tests/executor/unittests/_utils/test_async_utils.py
@@ -0,0 +1,17 @@
+from unittest.mock import patch
+
+import pytest
+
+from promptflow._utils.async_utils import async_run_allowing_running_loop
+
+
+@pytest.mark.unittest
+class TestAsyncUtils:
+ @pytest.mark.parametrize("has_running_loop,num1,num2,expected_result", [(False, 1, 2, 3), (True, 3, 4, 7)])
+ def test_async_run_allowing_running_loop(self, has_running_loop, num1, num2, expected_result):
+ async def async_func_to_test(a, b):
+ return a + b
+
+ with patch("promptflow._utils.async_utils._has_running_loop", return_value=has_running_loop):
+ result = async_run_allowing_running_loop(async_func_to_test, num1, num2)
+ assert result == expected_result
diff --git a/src/promptflow/tests/test_configs/flows/export/flex_flow_build/Dockerfile b/src/promptflow/tests/test_configs/flows/export/flex_flow_build/Dockerfile
index 47cf3570de1..5cb9b85df42 100644
--- a/src/promptflow/tests/test_configs/flows/export/flex_flow_build/Dockerfile
+++ b/src/promptflow/tests/test_configs/flows/export/flex_flow_build/Dockerfile
@@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y runit gcc
RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \
conda run -n promptflow-serve \
pip install -r /flow/requirements.txt && \
+ conda run -n promptflow-serve pip install promptflow && \
conda run -n promptflow-serve pip install keyrings.alt && \
conda run -n promptflow-serve pip install gunicorn==20.1.0 && \
conda run -n promptflow-serve pip install 'uvicorn>=0.27.0,<1.0.0' && \
diff --git a/src/promptflow/tests/test_configs/flows/export/linux/Dockerfile b/src/promptflow/tests/test_configs/flows/export/linux/Dockerfile
index cf7c9c6786c..85df18523a1 100644
--- a/src/promptflow/tests/test_configs/flows/export/linux/Dockerfile
+++ b/src/promptflow/tests/test_configs/flows/export/linux/Dockerfile
@@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y runit gcc
RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \
conda run -n promptflow-serve \
pip install -r /flow/requirements_txt && \
+ conda run -n promptflow-serve pip install promptflow && \
conda run -n promptflow-serve pip install keyrings.alt && \
conda run -n promptflow-serve pip install gunicorn==20.1.0 && \
conda run -n promptflow-serve pip install 'uvicorn>=0.27.0,<1.0.0' && \