Merge branch 'main' into hancwang/fix-nan

microsoft · Jul 18, 2024 · be22a4f · be22a4f
2 parents 4971d7d + 7260bab
commit be22a4f
Show file tree

Hide file tree

Showing 34 changed files with 440 additions and 158 deletions.
diff --git a/.github/workflows/promptflow-evals-e2e-test-azure.yml b/.github/workflows/promptflow-evals-e2e-test-azure.yml
@@ -14,28 +14,11 @@ env:
   WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
-        python-version: ['3.8', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows
     defaults:
@@ -52,10 +35,6 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
@@ -67,10 +46,7 @@ jobs:
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
-        working-directory: ${{ env.WORKING_DIRECTORY }}
-      - name: install promptflow-evals from wheel
-        # wildcard expansion (*) does not work in Windows, so leverage python to find and install
-        run: poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
+          poetry run pip install -e ../promptflow-evals
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/.github/workflows/promptflow-evals-e2e-test-local.yml b/.github/workflows/promptflow-evals-e2e-test-local.yml
@@ -18,9 +18,6 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
         python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows

diff --git a/.github/workflows/promptflow-evals-installation-test.yml b/.github/workflows/promptflow-evals-installation-test.yml
@@ -0,0 +1,61 @@
+name: promptflow-evals-installation-test
+
+on:
+  schedule:
+    - cron: "40 10 * * *" # 2:40 PST every day
+  pull_request:
+    paths:
+      - src/promptflow-evals/**
+      - .github/workflows/promptflow-evals-installation-test.yml
+  workflow_dispatch:
+
+env:
+  IS_IN_CI_PIPELINE: "true"
+  WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
+  PROMPT_FLOW_TEST_MODE: "live"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - uses: snok/install-poetry@v1
+    - name: build
+      run: poetry build
+      working-directory: ${{ env.WORKING_DIRECTORY }}
+    - uses: actions/upload-artifact@v4
+      with:
+        name: promptflow-evals
+        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
+
+  test:
+    needs: build
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-13]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+      fail-fast: false
+    # snok/install-poetry need this to support Windows
+    defaults:
+      run:
+        shell: bash
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+        with:
+          name: promptflow-evals
+          path: ${{ env.WORKING_DIRECTORY }}
+      - name: install virtualenv
+        run: python -m pip install virtualenv
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+      - name: install promptflow-evals from wheel
+        id: install_promptflow_no_extras
+        run: |
+          bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -l "300"
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+      - name: install promptflow-evals from wheel
+        id: install_promptflow_with_extras
+        run: |
+          bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -e "[azure]" -l "300"
+        working-directory: ${{ env.WORKING_DIRECTORY }}
diff --git a/...lows/promptflow-evals-regression-test.yml → ...ows/promptflow-evals-performance-test.yml b/...lows/promptflow-evals-regression-test.yml → ...ows/promptflow-evals-performance-test.yml
@@ -1,12 +1,12 @@
-name: promptflow-evals-regression-test
+name: promptflow-evals-performance-test
 
 on:
   schedule:
     - cron: "40 10 * * *" # 2:40 PST every day
   pull_request:
     paths:
       - src/promptflow-evals/**
-      - .github/workflows/promptflow-evals-regression-test.yml
+      - .github/workflows/promptflow-evals-performance-test.yml
   workflow_dispatch:
 
 env:
@@ -15,28 +15,11 @@ env:
   PROMPT_FLOW_TEST_MODE: "live"
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
-        python-version: ['3.8', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows
     defaults:
@@ -49,28 +32,20 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install promptflow-evals from wheel
         id: install_promptflow
         run: |
           # Estimate the installation time.
-          export start_tm=`date +%s`
           poetry run pip install -e ../promptflow
           poetry run pip install -e ../promptflow-core
           poetry run pip install -e ../promptflow-devkit
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
-          poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
-          export install_time=$((`date +%s` - ${start_tm}))
-          poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity install_time_s --value $install_time --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
-          test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
+          poetry run pip install -e ../promptflow-evals
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/.github/workflows/promptflow-evals-unit-test.yml b/.github/workflows/promptflow-evals-unit-test.yml
@@ -14,21 +14,7 @@ env:
   WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
@@ -45,28 +31,18 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install promptflow packages in editable mode
         run: |
-          export TIME_LIMIT=5
-          export start_tm=`date +%s`
           poetry run pip install -e ../promptflow
           poetry run pip install -e ../promptflow-core
           poetry run pip install -e ../promptflow-devkit
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
           poetry run pip install -e ../promptflow-evals
-          export install_time=$(((`date +%s` - ${start_tm})/60))
-          echo "The installation took ${install_time} minutes."
-          echo "The time limit for installation is ${TIME_LIMIT}"
-          test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/scripts/code_qa/calculate_install_time.sh b/scripts/code_qa/calculate_install_time.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+print_usage(){
+  if [ $# -gt 0 ]; then
+      echo "Missing argument ${1}"
+  fi
+  echo "Usage:"
+  echo "$0 -r [github run id] -w [github workflow] -a [github action id] -b [github ref id] -e [Optional extras] -f [ should we fail?] -l [instal, time limit]"
+  echo "Extras should be written as it appears in pip, for example for promptflow-evals[azure], it will be [azure]"
+  echo "Flag -f does not require parameter."
+  exit 1
+}
+
+run_id=""
+workflow=""
+action=""
+ref=""
+fail=0
+extras=""
+limit=""
+
+
+while getopts ":r:w:a:b:e:l:f" opt; do
+# Parse options
+  case $opt in
+    (r) run_id="$OPTARG";;
+    (w) workflow="$OPTARG";;
+    (a) action="$OPTARG";;
+    (b) ref="$OPTARG";;
+    (e) extras="$OPTARG";;
+    (f) ((fail++));;
+    (l) limit="$OPTARG";;
+    \?) print_usage;;
+  esac
+done
+
+for v in "run_id" "workflow" "action" "ref" "limit"; do
+    if [ -z ${!v} ]; then
+        print_usage "$v"
+    fi
+done
+
+ENV_DIR="test_pf_ev"
+python -m virtualenv "${ENV_DIR}"
+# Make activate command platform independent
+ACTIVATE="${ENV_DIR}/bin/activate"
+if [ ! -f "$ACTIVATE" ]; then
+  ACTIVATE="${ENV_DIR}/Scripts/activate"
+fi
+source "${ACTIVATE}"
+# Estimate the installation time.
+pf_evals_wheel=`ls -1 promptflow_evals-*`
+echo "The downloaded wheel file ${pf_evals_wheel}"
+packages=`python -m pip freeze | wc -l`
+start_tm=`date +%s`
+echo "python -m pip install \"./${pf_evals_wheel}${extras}\" --no-cache-dir"
+python -m pip install "./${pf_evals_wheel}${extras}" --no-cache-dir
+install_time=$((`date +%s` - ${start_tm}))
+packages_installed=$((`python -m pip freeze | wc -l` - packages))
+# Log the install time
+python `dirname "$0"`/report_to_app_insights.py --activity "install_time_s" --value "{\"install_time_s\": ${install_time}, \"number_of_packages_installed\": ${packages_installed}}" --git-hub-action-run-id "${run_id}" --git-hub-workflow "${workflow}" --git-hub-action "${action}" --git-branch "${ref}"
+deactivate
+rm -rf test_pf_ev
+echo "Installed ${packages_installed} packages per ${install_time} seconds."
+if [ $fail -eq 0 ]; then
+    # Swallow the exit code 1 and just show the warning, understandable by
+    # github UI.
+    test ${install_time} -le $limit || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} seconds, the limit is ${limit}."
+else
+    test ${install_time} -le $limit
+fi
+# Return the exit code of test command of of echo i.e. 0.
+exit $?
diff --git a/scripts/code_qa/report_to_app_insights.py b/scripts/code_qa/report_to_app_insights.py
@@ -1,6 +1,7 @@
 from typing import Dict, Optional, Union
 
 import argparse
+import json
 import platform
 
 from promptflow._sdk._configuration import Configuration
@@ -25,12 +26,12 @@ def parse_junit_xml(fle: str) -> Dict[str, Dict[str, Union[float, str]]]:
 
         for child in test.childNodes:
             if child.nodeName == 'failure':
-                test_results['fail_message'] = child.attributes["message"].value
+                test_results[test_name]['fail_message'] = child.attributes["message"].value
     return test_results
 
 
 def main(activity_name: str,
-         value: float,
+         value: Union[float, str],
          run_id: str,
          workflow: str,
          action: str,
@@ -70,9 +71,15 @@ def main(activity_name: str,
     if junit_file:
         junit_dict = parse_junit_xml(junit_file)
         for k, v in junit_dict.items():
-            activity_info[k] = -1 if v["fail_message"] else v['time']
+            if v["fail_message"]:
+                # Do not log time together with fail message.
+                continue
+            activity_info[k] = v['time']
     else:
-        activity_info["value"] = value
+        if isinstance(value, str):
+            activity_info.update(json.loads(value))
+        else:
+            activity_info["value"] = value
 
     # write information to the application insights.
     logger.info(action, extra={"custom_dimensions": activity_info})
@@ -83,8 +90,11 @@ def main(activity_name: str,
         description="Log the value to application insights along with platform characteristics and run ID.")
     parser.add_argument('--activity', help='The activity to be logged.',
                         required=True)
-    parser.add_argument('--value', type=float, help='The value for activity.',
-                        required=False, default=-1)
+    parser.add_argument(
+        '--value',
+        help='The floating point value for activity or a set of values in key-value format.',
+        required=False,
+        default=-1)
     parser.add_argument('--junit-xml', help='The path to junit-xml file.',
                         dest="junit_xml", required=False, default=None)
     parser.add_argument('--git-hub-action-run-id', dest='run_id',