Split installation tests from other e2e tests. (#3546)

# Description In this PR we split the installation time tests from unit and e2e tests. - Unit tests and e2e tests are running now in editable install - Removed the building wheel step from the tests - Added new gate, which builds the wheel and then installs it into virtual environment and logging time. - Added test for logging, which, however, does not switched on in CI/CD. - Re-enable tests on python 3.9 in CI/CD as we do not depend on wheel in tests anymore i.e. we have separate test for creation of a functional wheel. Example query to application insights: ``` let date_of_run=datetime("2024-07-02"); customEvents | where timestamp > date_of_run | where customDimensions has 'installation-test' | extend parsed = parse_json(customDimensions) | project name, run_date=format_datetime(timestamp, 'yyyy-MM-dd'), metric = parsed['activity_name'], OS = strcat(parsed["OS"], "_", parsed["OS_release"]), python_version = tostring(parsed['python_version']), install_time_s=todecimal(parsed["install_time_s"]) | summarize performance = avg(install_time_s) by OS, name | render columnchart with( kind=unstacked ) ``` Result: ![image](https://github.com/user-attachments/assets/d3b28293-05d0-46cc-b4b7-678a6982a475) See work item 3342618 for the inotial task and task 3339407 to enable tests on python 3.9. In this PR we are installing wheel with virtual env, which does not have issue mentioned in the bug. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **I confirm that all new dependencies are compatible with the MIT license.** - [X] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes.
microsoft · Jul 16, 2024 · 93c211d · 93c211d
1 parent 696b129
commit 93c211d
Show file tree

Hide file tree

Showing 8 changed files with 223 additions and 88 deletions.
diff --git a/.github/workflows/promptflow-evals-e2e-test-azure.yml b/.github/workflows/promptflow-evals-e2e-test-azure.yml
@@ -14,28 +14,11 @@ env:
   WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
-        python-version: ['3.8', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows
     defaults:
@@ -52,10 +35,6 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
@@ -67,10 +46,7 @@ jobs:
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
-        working-directory: ${{ env.WORKING_DIRECTORY }}
-      - name: install promptflow-evals from wheel
-        # wildcard expansion (*) does not work in Windows, so leverage python to find and install
-        run: poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
+          poetry run pip install -e ../promptflow-evals
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/.github/workflows/promptflow-evals-e2e-test-local.yml b/.github/workflows/promptflow-evals-e2e-test-local.yml
@@ -18,9 +18,6 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
         python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows

diff --git a/.github/workflows/promptflow-evals-installation-test.yml b/.github/workflows/promptflow-evals-installation-test.yml
@@ -0,0 +1,61 @@
+name: promptflow-evals-installation-test
+
+on:
+  schedule:
+    - cron: "40 10 * * *" # 2:40 PST every day
+  pull_request:
+    paths:
+      - src/promptflow-evals/**
+      - .github/workflows/promptflow-evals-installation-test.yml
+  workflow_dispatch:
+
+env:
+  IS_IN_CI_PIPELINE: "true"
+  WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
+  PROMPT_FLOW_TEST_MODE: "live"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - uses: snok/install-poetry@v1
+    - name: build
+      run: poetry build
+      working-directory: ${{ env.WORKING_DIRECTORY }}
+    - uses: actions/upload-artifact@v4
+      with:
+        name: promptflow-evals
+        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
+
+  test:
+    needs: build
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-13]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+      fail-fast: false
+    # snok/install-poetry need this to support Windows
+    defaults:
+      run:
+        shell: bash
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+        with:
+          name: promptflow-evals
+          path: ${{ env.WORKING_DIRECTORY }}
+      - name: install virtualenv
+        run: python -m pip install virtualenv
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+      - name: install promptflow-evals from wheel
+        id: install_promptflow_no_extras
+        run: |
+          bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -l "300"
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+      - name: install promptflow-evals from wheel
+        id: install_promptflow_with_extras
+        run: |
+          bash ../../scripts/code_qa/calculate_install_time.sh -r ${{ github.run_id }} -w ${{ github.workflow }} -a ${{ github.action }} -b ${{ github.ref }} -e "[azure]" -l "300"
+        working-directory: ${{ env.WORKING_DIRECTORY }}
diff --git a/...lows/promptflow-evals-regression-test.yml → ...ows/promptflow-evals-performance-test.yml b/...lows/promptflow-evals-regression-test.yml → ...ows/promptflow-evals-performance-test.yml
@@ -1,12 +1,12 @@
-name: promptflow-evals-regression-test
+name: promptflow-evals-performance-test
 
 on:
   schedule:
     - cron: "40 10 * * *" # 2:40 PST every day
   pull_request:
     paths:
       - src/promptflow-evals/**
-      - .github/workflows/promptflow-evals-regression-test.yml
+      - .github/workflows/promptflow-evals-performance-test.yml
   workflow_dispatch:
 
 env:
@@ -15,28 +15,11 @@ env:
   PROMPT_FLOW_TEST_MODE: "live"
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
-        # TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
-        # https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
-        # Add 3.9 back after we figure out the issue
-        python-version: ['3.8', '3.10', '3.11']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
       fail-fast: false
     # snok/install-poetry need this to support Windows
     defaults:
@@ -49,28 +32,20 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install promptflow-evals from wheel
         id: install_promptflow
         run: |
           # Estimate the installation time.
-          export start_tm=`date +%s`
           poetry run pip install -e ../promptflow
           poetry run pip install -e ../promptflow-core
           poetry run pip install -e ../promptflow-devkit
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
-          poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
-          export install_time=$((`date +%s` - ${start_tm}))
-          poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity install_time_s --value $install_time --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
-          test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
+          poetry run pip install -e ../promptflow-evals
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/.github/workflows/promptflow-evals-unit-test.yml b/.github/workflows/promptflow-evals-unit-test.yml
@@ -14,21 +14,7 @@ env:
   WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - uses: snok/install-poetry@v1
-    - name: build
-      run: poetry build
-      working-directory: ${{ env.WORKING_DIRECTORY }}
-    - uses: actions/upload-artifact@v4
-      with:
-        name: promptflow-evals
-        path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
-
   test:
-    needs: build
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-13]
@@ -45,28 +31,18 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - uses: snok/install-poetry@v1
-      - uses: actions/download-artifact@v4
-        with:
-          name: promptflow-evals
-          path: ${{ env.WORKING_DIRECTORY }}
       - name: install test dependency group
         run: poetry install --only test
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install promptflow packages in editable mode
         run: |
-          export TIME_LIMIT=5
-          export start_tm=`date +%s`
           poetry run pip install -e ../promptflow
           poetry run pip install -e ../promptflow-core
           poetry run pip install -e ../promptflow-devkit
           poetry run pip install -e ../promptflow-tracing
           poetry run pip install -e ../promptflow-tools
           poetry run pip install -e ../promptflow-azure
           poetry run pip install -e ../promptflow-evals
-          export install_time=$(((`date +%s` - ${start_tm})/60))
-          echo "The installation took ${install_time} minutes."
-          echo "The time limit for installation is ${TIME_LIMIT}"
-          test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
         working-directory: ${{ env.WORKING_DIRECTORY }}
       - name: install recording
         run: poetry run pip install -e ../promptflow-recording

diff --git a/scripts/code_qa/calculate_install_time.sh b/scripts/code_qa/calculate_install_time.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+print_usage(){
+  if [ $# -gt 0 ]; then
+      echo "Missing argument ${1}"
+  fi
+  echo "Usage:"
+  echo "$0 -r [github run id] -w [github workflow] -a [github action id] -b [github ref id] -e [Optional extras] -f [ should we fail?] -l [instal, time limit]"
+  echo "Extras should be written as it appears in pip, for example for promptflow-evals[azure], it will be [azure]"
+  echo "Flag -f does not require parameter."
+  exit 1
+}
+
+run_id=""
+workflow=""
+action=""
+ref=""
+fail=0
+extras=""
+limit=""
+
+
+while getopts ":r:w:a:b:e:l:f" opt; do
+# Parse options
+  case $opt in
+    (r) run_id="$OPTARG";;
+    (w) workflow="$OPTARG";;
+    (a) action="$OPTARG";;
+    (b) ref="$OPTARG";;
+    (e) extras="$OPTARG";;
+    (f) ((fail++));;
+    (l) limit="$OPTARG";;
+    \?) print_usage;;
+  esac
+done
+
+for v in "run_id" "workflow" "action" "ref" "limit"; do
+    if [ -z ${!v} ]; then
+        print_usage "$v"
+    fi
+done
+
+ENV_DIR="test_pf_ev"
+python -m virtualenv "${ENV_DIR}"
+# Make activate command platform independent
+ACTIVATE="${ENV_DIR}/bin/activate"
+if [ ! -f "$ACTIVATE" ]; then
+  ACTIVATE="${ENV_DIR}/Scripts/activate"
+fi
+source "${ACTIVATE}"
+# Estimate the installation time.
+pf_evals_wheel=`ls -1 promptflow_evals-*`
+echo "The downloaded wheel file ${pf_evals_wheel}"
+packages=`python -m pip freeze | wc -l`
+start_tm=`date +%s`
+echo "python -m pip install \"./${pf_evals_wheel}${extras}\" --no-cache-dir"
+python -m pip install "./${pf_evals_wheel}${extras}" --no-cache-dir
+install_time=$((`date +%s` - ${start_tm}))
+packages_installed=$((`python -m pip freeze | wc -l` - packages))
+# Log the install time
+python `dirname "$0"`/report_to_app_insights.py --activity "install_time_s" --value "{\"install_time_s\": ${install_time}, \"number_of_packages_installed\": ${packages_installed}}" --git-hub-action-run-id "${run_id}" --git-hub-workflow "${workflow}" --git-hub-action "${action}" --git-branch "${ref}"
+deactivate
+rm -rf test_pf_ev
+echo "Installed ${packages_installed} packages per ${install_time} seconds."
+if [ $fail -eq 0 ]; then
+    # Swallow the exit code 1 and just show the warning, understandable by
+    # github UI.
+    test ${install_time} -le $limit || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} seconds, the limit is ${limit}."
+else
+    test ${install_time} -le $limit
+fi
+# Return the exit code of test command of of echo i.e. 0.
+exit $?
diff --git a/scripts/code_qa/report_to_app_insights.py b/scripts/code_qa/report_to_app_insights.py
@@ -1,6 +1,7 @@
 from typing import Dict, Optional, Union
 
 import argparse
+import json
 import platform
 
 from promptflow._sdk._configuration import Configuration
@@ -25,12 +26,12 @@ def parse_junit_xml(fle: str) -> Dict[str, Dict[str, Union[float, str]]]:
 
         for child in test.childNodes:
             if child.nodeName == 'failure':
-                test_results['fail_message'] = child.attributes["message"].value
+                test_results[test_name]['fail_message'] = child.attributes["message"].value
     return test_results
 
 
 def main(activity_name: str,
-         value: float,
+         value: Union[float, str],
          run_id: str,
          workflow: str,
          action: str,
@@ -70,9 +71,15 @@ def main(activity_name: str,
     if junit_file:
         junit_dict = parse_junit_xml(junit_file)
         for k, v in junit_dict.items():
-            activity_info[k] = -1 if v["fail_message"] else v['time']
+            if v["fail_message"]:
+                # Do not log time together with fail message.
+                continue
+            activity_info[k] = v['time']
     else:
-        activity_info["value"] = value
+        if isinstance(value, str):
+            activity_info.update(json.loads(value))
+        else:
+            activity_info["value"] = value
 
     # write information to the application insights.
     logger.info(action, extra={"custom_dimensions": activity_info})
@@ -83,8 +90,11 @@ def main(activity_name: str,
         description="Log the value to application insights along with platform characteristics and run ID.")
     parser.add_argument('--activity', help='The activity to be logged.',
                         required=True)
-    parser.add_argument('--value', type=float, help='The value for activity.',
-                        required=False, default=-1)
+    parser.add_argument(
+        '--value',
+        help='The floating point value for activity or a set of values in key-value format.',
+        required=False,
+        default=-1)
     parser.add_argument('--junit-xml', help='The path to junit-xml file.',
                         dest="junit_xml", required=False, default=None)
     parser.add_argument('--git-hub-action-run-id', dest='run_id',