From df58fe51f57006493e0da378dd36a2ae94aadd46 Mon Sep 17 00:00:00 2001
From: ZePan110 <ze.pan@intel.com>
Date: Fri, 20 Sep 2024 10:51:32 +0800
Subject: [PATCH] Add hyperlinks and paths validation. (#132)

Signed-off-by: ZePan110 <ze.pan@intel.com>
---
 .github/workflows/pr-path-detection.yml       | 123 ++++++++++++++++++
 README.md                                     |   2 +-
 doc/platform-optimization/README.md           |   2 +-
 evals/evaluation/autorag/evaluation/README.md |   4 +-
 evals/evaluation/rag_eval/README.md           |   2 +-
 examples/AudioQnA/README.md                   |  48 +++++++
 6 files changed, 176 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/pr-path-detection.yml
 create mode 100644 examples/AudioQnA/README.md

diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml
new file mode 100644
index 00000000..2bfb3969
--- /dev/null
+++ b/.github/workflows/pr-path-detection.yml
@@ -0,0 +1,123 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Paths and Hyperlinks
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, ready_for_review, synchronize]
+
+jobs:
+  check-the-validity-of-hyperlinks-in-README:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clean Up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout Repo GenAIEval
+        uses: actions/checkout@v4
+
+      - name: Check the Validity of Hyperlinks
+        run: |
+          cd ${{github.workspace}}
+          fail="FALSE"
+          url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'GenAIEval/blob/main')
+          if [ -n "$url_lines" ]; then
+            for url_line in $url_lines; do
+              url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
+              path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
+              response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+              if [ "$response" -ne 200 ]; then
+                echo "**********Validation failed, try again**********"
+                response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response_retry" -eq 200 ]; then
+                  echo "*****Retry successfully*****"
+                else
+                  echo "Invalid link from ${{github.workspace}}/$path: $url"
+                  fail="TRUE"
+                fi
+              fi
+            done
+          fi
+
+          if [[ "$fail" == "TRUE" ]]; then
+            exit 1
+          else
+            echo "All hyperlinks are valid."
+          fi
+        shell: bash
+
+  check-the-validity-of-relative-path:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clean up Working Directory
+        run: sudo rm -rf ${{github.workspace}}/*
+
+      - name: Checkout Repo GenAIEval
+        uses: actions/checkout@v4
+
+      - name: Checking Relative Path Validity
+        run: |
+          cd ${{github.workspace}}
+          fail="FALSE"
+          repo_name=${{ github.event.pull_request.head.repo.full_name }}
+          if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
+            owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
+            branch="https://github.com/$owner/GenAIEval/tree/${{ github.event.pull_request.head.ref }}"
+          else
+            branch="https://github.com/opea-project/GenAIEval/blob/${{ github.event.pull_request.head.ref }}"
+          fi
+          link_head="https://github.com/opea-project/GenAIEval/blob/main"
+          png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
+          if [ -n "$png_lines" ]; then
+            for png_line in $png_lines; do
+              refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
+              png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
+              if [[ "${png_path:0:1}" == "/" ]]; then
+                check_path=${{github.workspace}}$png_path
+              elif [[ "${png_path:0:1}" == "#" ]]; then
+                check_path=${{github.workspace}}/$refer_path$png_path
+              else
+                check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
+              fi
+              real_path=$(realpath $check_path)
+              if [ $? -ne 0 ]; then
+                echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
+                fail="TRUE"
+              else
+                url=$link_head$(echo "$real_path" | sed 's|.*/GenAIEval||')
+                response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
+                if [ "$response" -ne 200 ]; then
+                  echo "**********Validation failed, try again**********"
+                  response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
+                  if [ "$response_retry" -eq 200 ]; then
+                    echo "*****Retry successfully*****"
+                  else
+                    echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
+                    url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIEval||')
+                    response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
+                    if [ "$response" -ne 200 ]; then
+                      echo "**********Validation failed, try again**********"
+                      response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
+                      if [ "$response_retry" -eq 200 ]; then
+                        echo "*****Retry successfully*****"
+                      else
+                        echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path"
+                        fail="TRUE"
+                      fi
+                    else
+                      echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
+                    fi
+                  fi
+                fi
+              fi
+            done
+          fi
+
+          if [[ "$fail" == "TRUE" ]]; then
+            exit 1
+          else
+            echo "All hyperlinks are valid."
+          fi
+        shell: bash
diff --git a/README.md b/README.md
index 8734f83a..3d6b6d6e 100644
--- a/README.md
+++ b/README.md
@@ -67,7 +67,7 @@ results = evaluate(args)
 
 #### remote service usage
 
-1. setup a separate server with [GenAIComps](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/lm-eval)
+1. setup a separate server with [GenAIComps](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/utils/lm-eval)
 
    ```
    # build cpu docker
diff --git a/doc/platform-optimization/README.md b/doc/platform-optimization/README.md
index ae74765d..8b98a21c 100644
--- a/doc/platform-optimization/README.md
+++ b/doc/platform-optimization/README.md
@@ -98,7 +98,7 @@ Let us consider isolating AI inference and reranking containers in
 application's Gaudi accelerated pipeline.
 
 In the
-[manifest](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/kubernetes/manifests/gaudi/chatqna.yaml)
+[manifest](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml)
 there are "tgi", "tei" and "teirerank" containers in "chatqna-tgi" and
 "chatqna-tei" and "chatqna-teirerank" deployments that will need a lot
 of CPUs. They implement text-generation-interface and
diff --git a/evals/evaluation/autorag/evaluation/README.md b/evals/evaluation/autorag/evaluation/README.md
index 8068d58b..99a623d1 100644
--- a/evals/evaluation/autorag/evaluation/README.md
+++ b/evals/evaluation/autorag/evaluation/README.md
@@ -1,6 +1,6 @@
 # AutoRAG to evaluate the RAG system performance
 
-AutoRAG is help to end-to-end evaluate the performance of the whole system. Currently, we support to evaluate the performance from 4 perspectives, answer_relevancy, faithfulness, context_recall, context_precision. Before using this service, the use should firstly prepare the groundtruth dataset in the [standard format](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/ground_truth.jsonl). We also provide a [script](https://github.com/opea-project/GenAIEval/blob/main/evals/evaluation/autorag/data_generation/gen_eval_dataset.py) to automatically generate the groundtruth query and answer.
+AutoRAG is help to end-to-end evaluate the performance of the whole system. Currently, we support to evaluate the performance from 4 perspectives, answer_relevancy, faithfulness, context_recall, context_precision. Before using this service, the use should firstly prepare the groundtruth dataset in the [standard format](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/ragas/ground_truth.jsonl). We also provide a [script](https://github.com/opea-project/GenAIEval/blob/main/evals/evaluation/autorag/data_generation/gen_eval_dataset.py) to automatically generate the groundtruth query and answer.
 
 ## Service preparation
 The evaluation for the RAG system is based on the set up of the RAG services. Please follow [the steps](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/README.md) to set up your RAG services.
@@ -12,7 +12,7 @@ At this moment, we provide a solution that test the single group of parameters a
 python -u ragas_evaluation_benchmark.py --ground_truth_file ground_truth.jsonl --search_type mmr --k 1 --fetch_k 5 --score_threshold 0.3 --top_n 1 --temperature 0.01 --top_k 5 --top_p 0.95 --repetition_penalty 1.1 --use_openai_key True
 ```
 
-For evaluating multiple groups of parameters, please use [this script](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/run_rag_benchmark.py). 
+For evaluating multiple groups of parameters, please use [this script](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/ragas/run_rag_benchmark.py). 
 ```bash
 python -u run_rag_benchmark.py --config config.yaml
 ```
diff --git a/evals/evaluation/rag_eval/README.md b/evals/evaluation/rag_eval/README.md
index 59f7dd2f..1186464a 100644
--- a/evals/evaluation/rag_eval/README.md
+++ b/evals/evaluation/rag_eval/README.md
@@ -7,7 +7,7 @@
   - [Prerequisites](#prerequisites)
   - [MultiHop (English dataset)](#multihop)
     - [Launch Service of RAG System](#launch-service-of-rag-system)
-    - [Launch Service of LLM-as-a-Judge](launch-service-of-llm)
+    - [Launch Service of LLM-as-a-Judge](#launch-service-of-llm-as-a-judge)
     - [Prepare Dataset](#prepare-dataset)
     - [Evaluation](#evaluation)
   - [CRUD (Chinese dataset)](#crud)
diff --git a/examples/AudioQnA/README.md b/examples/AudioQnA/README.md
new file mode 100644
index 00000000..45290620
--- /dev/null
+++ b/examples/AudioQnA/README.md
@@ -0,0 +1,48 @@
+# AudioQnA accuracy Evaluation
+
+## Dataset 
+
+
+We evaluate the ASR accuracy on the test set of librispeech [dataset](https://huggingface.co/datasets/andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts.
+
+## Metrics
+
+We evaluate the WER (Word Error Rate) metric of the ASR microservice.
+
+## Evaluation
+
+### Launch ASR microservice
+
+Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
+
+```bash
+git clone https://github.com/opea-project/GenAIComps
+cd GenAIComps
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+# change the name of model by editing model_name_or_path you want to evaluate
+docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny"
+```
+
+### Evaluate
+
+Install dependencies:
+
+```
+pip install -r requirements.txt
+```
+
+Evaluate the performance with the LLM:
+```py
+# validate the offline model
+# python offline_evaluate.py
+# validate the online asr microservice accuracy
+python online_evaluate.py
+```
+
+### Performance Result
+Here is the tested result for your reference
+||  WER   |
+| --- |  ----  |
+|whisper-large-v2| 2.87|
+|whisper-large| 2.7	|
+|whisper-medium| 3.45 |