Skip to content

Add CRAG benchmark

Add CRAG benchmark #97

Workflow file for this run

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Model Test on CPU
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- .github/workflows/model_test_cpu.yml
- evals/evaluation/**
- evals/metrics/**
- setup.py
workflow_dispatch:
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions: write-all
env:
OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
SCRIPT_PATH: /GenAIEval/.github/workflows/scripts
DOCKER_NAME: "genaieval"
DOCKER_TAG: "latest"
CONTAINER_NAME: "modelTest"
jobs:
Evaluation-Workflow:
runs-on: aise-cluster-cpu
strategy:
matrix:
include:
- modelName: "opt-125m"
datasets: "lambada_openai"
device: "cpu"
tasks: "text-generation"
fail-fast: true
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout out Repo
uses: actions/checkout@v4
with:
submodules: "recursive"
fetch-tags: true
# We need this because GitHub needs to clone the branch to pipeline
- name: Docker Build
run: |
docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} .
- name: Docker Run
run: |
if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}'$) ]]; then
docker stop ${{ env.CONTAINER_NAME }}
docker rm -vf ${{ env.CONTAINER_NAME }} || true
fi
docker run -dit --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} -v /dev/shm:/dev/shm \
-v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }}
- name: Binary build
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install"
- name: Evaluation
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash -x model_test.sh --model=${{ matrix.modelName }} --device=${{ matrix.device }} --datasets=${{ matrix.datasets }} --tasks=${{ matrix.tasks }}"
- name: Collect Log
run: |
docker exec ${{ env.CONTAINER_NAME }} \
bash -c "cd /GenAIEval/.github/workflows/scripts/models \
&& bash -x collect_log.sh --model=${{ matrix.modelName }} \
--device=${{ matrix.device }} \
--datasets=${{ matrix.datasets }} \
--tasks=${{ matrix.tasks }}"
- name: Publish pipeline artifact
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: ${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}
path: |
${{ github.workspace }}/${{ matrix.device }}-${{ matrix.tasks }}-${{ matrix.modelName }}-${{ matrix.datasets }}.log
${{ github.workspace }}/summary.log
if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
retention-days: 60 # 1 <= retention-days <= 90
Genreate-Report:
runs-on: ubuntu-latest
needs: [Evaluation-Workflow]
steps:
- name: Checkout out Repo
uses: actions/checkout@v4
- name: Download Summary Log
uses: actions/download-artifact@v4
with:
path: ${{ env.OUT_SCRIPT_PATH }}/log
- name: Display structure of downloaded files
run: ls -R
- name: Analysis Summary
run: |
cd ${{ env.OUT_SCRIPT_PATH }}
ls -R
- name: Download Reference Artifact
id: download-artifact
uses: dawidd6/[email protected]
with:
workflow: model_test_cpu.yml
name: FinalReport
run_id: ${{ vars.ModelTest_CPU_REF_ID }}
path: ${{ env.OUT_SCRIPT_PATH }}
name_is_regexp: true
repo: ${{ github.repository }}
check_artifacts: false
search_artifacts: false
skip_unpack: false
if_no_artifact_found: warn
- name: Display structure of downloaded files
run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R
- name: Generate report
run: |
echo "------ Generating final report.html ------"
cd ${{ env.OUT_SCRIPT_PATH }}
mkdir -p generated
/usr/bin/bash -x generate_report.sh
env:
RUN_DISPLAY_URL: https://https://github.com/opea-project/GenAIEval/actions/runs/${{ github.run_id }}
BUILD_NUMBER: ${{ github.run_id }}
JOB_STATUS: succeed
- name: Publish Report
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: FinalReport
path: ${{ env.OUT_SCRIPT_PATH }}/generated
- name: Specify performance regression
if: ${{ !cancelled() }}
run: |
if [ ${{ env.is_perf_reg }} == 'true' ]; then
echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
exit 1
fi