From ec505edef419982ccdd5e3ecd4cbecd522cf72db Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 24 Jun 2024 14:10:39 +0800 Subject: [PATCH] Fix coverage command (#33) Signed-off-by: Sun, Xuehao Co-authored-by: chensuyue --- .github/workflows/model_test_cpu.yml | 2 +- .github/workflows/model_test_hpu.yml | 2 +- .github/workflows/scripts/install_evals.sh | 12 ++++++++++++ .github/workflows/scripts/unittest/calc_coverage.sh | 6 +++--- .github/workflows/scripts/unittest/unittest.sh | 4 ++-- .github/workflows/unittest.yml | 3 ++- .../lm_eval/models/huggingface.py | 2 +- tests/test_lm_eval.py | 5 +++-- 8 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/scripts/install_evals.sh diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml index 3fe43edf..ce43a9e3 100644 --- a/.github/workflows/model_test_cpu.yml +++ b/.github/workflows/model_test_cpu.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - modelName: "opt-125m" - datasets: "piqa" + datasets: "lambada_openai" device: "cpu" tasks: "text-generation" fail-fast: true diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml index 4914a68d..613372ef 100644 --- a/.github/workflows/model_test_hpu.yml +++ b/.github/workflows/model_test_hpu.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - modelName: "opt-125m" - datasets: "piqa" + datasets: "lambada_openai" device: "hpu" tasks: "text-generation" fail-fast: true diff --git a/.github/workflows/scripts/install_evals.sh b/.github/workflows/scripts/install_evals.sh new file mode 100644 index 00000000..56c0eb43 --- /dev/null +++ b/.github/workflows/scripts/install_evals.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +echo -e "\n Install GenAIEval ... " +cd /GenAIEval +python -m pip install --no-cache-dir -r requirements.txt +python setup.py bdist_wheel +pip install dist/opea_eval*.whl + +pip list diff --git a/.github/workflows/scripts/unittest/calc_coverage.sh b/.github/workflows/scripts/unittest/calc_coverage.sh index 16cd09e9..e2e1cc23 100644 --- a/.github/workflows/scripts/unittest/calc_coverage.sh +++ b/.github/workflows/scripts/unittest/calc_coverage.sh @@ -3,11 +3,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -source ../../change_color +source ../change_color LOG_DIR=$1 coverage_compare="${LOG_DIR}/coverage_compare.html" -coverage_log_pr="${LOG_DIR}/UnitTestPR-test/coverage_pr" -coverage_log_base="${LOG_DIR}/UnitTestbaseline/coverage_base" +coverage_log_pr="${LOG_DIR}/UnitTestPR-test/pr" +coverage_log_base="${LOG_DIR}/UnitTestbaseline/base" function get_coverage_data() { # Input argument diff --git a/.github/workflows/scripts/unittest/unittest.sh b/.github/workflows/scripts/unittest/unittest.sh index fab9145f..e1076565 100644 --- a/.github/workflows/scripts/unittest/unittest.sh +++ b/.github/workflows/scripts/unittest/unittest.sh @@ -27,8 +27,8 @@ function pytest() { ut_log_name="${LOG_DIR}/unit_test_$1.log" export GLOG_minloglevel=2 - genaieval_path=$(python -c 'import GenAIEval; import os; print(os.path.dirname(GenAIEval.__file__))') - find . -name "test*.py" | sed 's,\.\/,coverage run --source='"${genaieval_path}"' --append ,g' | sed 's/$/ --verbose/' >run.sh + genaieval_path=$(python3 -c 'import evals; print(evals.__path__[0])') + find . -name "test*.py" | sed "s,\.\/,coverage run --source=\"${genaieval_path}\" --append ,g" | sed 's/$/ --verbose/' >run.sh coverage erase # run UT diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 354fc2f1..ca76f308 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -13,6 +13,7 @@ on: - GenAIEval/** - setup.py - tests/** + - .github/workflows/scripts/unittest/** workflow_dispatch: # If there is a new commit, the previous jobs will be canceled @@ -61,7 +62,7 @@ jobs: - name: Install Dependencies run: | - docker exec ${{ env.CONTAINER_NAME }} bash -c "cd /GenAIEval && pip install -r requirements.txt && python setup.py install" + docker exec ${{ env.CONTAINER_NAME }} bash -c "bash /GenAIEval/.github/workflows/scripts/install_evals.sh" - name: Run UT run: | diff --git a/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py index 1631672c..0d8c595b 100644 --- a/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py +++ b/evals/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py @@ -88,7 +88,7 @@ def __init__( dtype: Optional[Union[str, torch.dtype]] = "auto", batch_size: Optional[Union[int, str]] = 1, max_batch_size: Optional[int] = 64, - trust_remote_code: Optional[bool] = False, + trust_remote_code: Optional[bool] = True, use_fast_tokenizer: Optional[bool] = True, add_bos_token: Optional[bool] = False, prefix_token_id: Optional[int] = None, diff --git a/tests/test_lm_eval.py b/tests/test_lm_eval.py index 552f33ed..4fb23e70 100644 --- a/tests/test_lm_eval.py +++ b/tests/test_lm_eval.py @@ -21,13 +21,14 @@ def test_lm_eval(self): model="hf", user_model=user_model, tokenizer=tokenizer, - tasks="piqa", + tasks="lambada_openai", device="cpu", batch_size=1, limit=5, + trust_remote_code=True, ) results = evaluate(args) - self.assertEqual(results["results"]["piqa"]["acc,none"], 0.6) + self.assertEqual(results["results"]["lambada_openai"]["acc,none"], 0.6) if __name__ == "__main__":