From bfc29fec2755897bafae6a65756a48e93b0a03ac Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 15 Oct 2024 10:32:08 -0400 Subject: [PATCH] Rename to llm-eval-test --- README.md | 10 +++++----- {perf_llm_eval => llm_eval_test}/__main__.py | 6 +++--- .../benchmarks/catalog/cards/mmlu_pro/all.json | 0 .../benchmarks/catalog/cards/mmlu_pro/biology.json | 0 .../benchmarks/catalog/cards/mmlu_pro/business.json | 0 .../benchmarks/catalog/cards/mmlu_pro/chemistry.json | 0 .../catalog/cards/mmlu_pro/computer_science.json | 0 .../benchmarks/catalog/cards/mmlu_pro/economics.json | 0 .../benchmarks/catalog/cards/mmlu_pro/engineering.json | 0 .../benchmarks/catalog/cards/mmlu_pro/health.json | 0 .../benchmarks/catalog/cards/mmlu_pro/history.json | 0 .../benchmarks/catalog/cards/mmlu_pro/law.json | 0 .../benchmarks/catalog/cards/mmlu_pro/math.json | 0 .../benchmarks/catalog/cards/mmlu_pro/other.json | 0 .../benchmarks/catalog/cards/mmlu_pro/philosophy.json | 0 .../benchmarks/catalog/cards/mmlu_pro/physics.json | 0 .../benchmarks/catalog/cards/mmlu_pro/psychology.json | 0 .../benchmarks/catalog/metrics/accuracy.json | 0 .../benchmarks/catalog/processors/first_character.json | 0 .../catalog/tasks/qa/multiple_choice/with_topic.json | 0 .../qa/multiple_choice/with_topic/lm_eval_harness.json | 0 .../benchmarks/tasks/_mmlu_pro.yaml | 0 .../benchmarks/tasks/mmlu_pro_all.yaml | 0 .../benchmarks/tasks/mmlu_pro_biology.yaml | 0 .../benchmarks/tasks/mmlu_pro_business.yaml | 0 .../benchmarks/tasks/mmlu_pro_chemistry.yaml | 0 .../benchmarks/tasks/mmlu_pro_computer_science.yaml | 0 .../benchmarks/tasks/mmlu_pro_economics.yaml | 0 .../benchmarks/tasks/mmlu_pro_engineering.yaml | 0 .../benchmarks/tasks/mmlu_pro_health.yaml | 0 .../benchmarks/tasks/mmlu_pro_history.yaml | 0 .../benchmarks/tasks/mmlu_pro_law.yaml | 0 .../benchmarks/tasks/mmlu_pro_math.yaml | 0 .../benchmarks/tasks/mmlu_pro_other.yaml | 0 .../benchmarks/tasks/mmlu_pro_philosophy.yaml | 0 .../benchmarks/tasks/mmlu_pro_physics.yaml | 0 .../benchmarks/tasks/mmlu_pro_psychology.yaml | 0 .../benchmarks/tasks/task.py | 0 .../benchmarks/tasks/unitxt | 0 {perf_llm_eval => llm_eval_test}/lm_eval_wrapper.py | 2 +- {perf_llm_eval => llm_eval_test}/parser.py | 0 .../wrappers/unitxt/data/data.py | 0 .../wrappers/unitxt/metric/metric.py | 0 pyproject.toml | 4 ++-- 44 files changed, 11 insertions(+), 11 deletions(-) rename {perf_llm_eval => llm_eval_test}/__main__.py (93%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/all.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/biology.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/business.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/chemistry.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/computer_science.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/economics.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/engineering.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/health.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/history.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/law.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/math.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/other.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/philosophy.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/physics.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/psychology.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/metrics/accuracy.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/processors/first_character.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/_mmlu_pro.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_all.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_biology.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_business.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_chemistry.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_computer_science.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_economics.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_engineering.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_health.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_history.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_law.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_math.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_other.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_philosophy.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_physics.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_psychology.yaml (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/task.py (100%) rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/unitxt (100%) rename {perf_llm_eval => llm_eval_test}/lm_eval_wrapper.py (97%) rename {perf_llm_eval => llm_eval_test}/parser.py (100%) rename {perf_llm_eval => llm_eval_test}/wrappers/unitxt/data/data.py (100%) rename {perf_llm_eval => llm_eval_test}/wrappers/unitxt/metric/metric.py (100%) diff --git a/README.md b/README.md index 525abd3..cc402db 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# perf-llm-eval +# llm-eval-test A wrapper around [lm-eval-harness](https://github.com/EleutherAI/lm-evaluation-harness) and [Unitxt](https://github.com/IBM/unitxt) designed for evaluation of a local inference endpoint. @@ -25,16 +25,16 @@ python -m venv venv source venv/bin/activate # Install the package -pip install git+https://github.com/sjmonson/perf-llm-eval.git +pip install git+https://github.com/sjmonson/llm-eval-test.git # View run options -perf-llm-eval run --help +llm-eval-test run --help ``` ## Usage ``` -usage: perf-llm-eval run [-h] [--catalog_path PATH] [--tasks_path PATH] [-v | -q] -H ENDPOINT -m MODEL -t TASKS -d PATH [-b INT] [-o OUTPUT] +usage: llm-eval-test run [-h] [--catalog_path PATH] [--tasks_path PATH] [-v | -q] -H ENDPOINT -m MODEL -t TASKS -d PATH [-b INT] [-o OUTPUT] Run tasks @@ -74,5 +74,5 @@ huggingface-cli download $DATASET --repo-type dataset --local-dir $DATASETS_DIR/ # Run the benchmark ENDPOINT=http://127.0.0.1:8000/v1/completions # An OpenAI API-compatable completions endpoint MODEL_NAME=meta-llama/Llama-3.1-8B # Name of the model hosted on the inference server -perf-llm-eval run --endpoint $ENDPOINT --model $MODEL_NAME --datasets $DATASETS_DIR --tasks mmlu_pro +llm-eval-test run --endpoint $ENDPOINT --model $MODEL_NAME --datasets $DATASETS_DIR --tasks mmlu_pro ``` diff --git a/perf_llm_eval/__main__.py b/llm_eval_test/__main__.py similarity index 93% rename from perf_llm_eval/__main__.py rename to llm_eval_test/__main__.py index 798f01d..9cdf123 100755 --- a/perf_llm_eval/__main__.py +++ b/llm_eval_test/__main__.py @@ -4,9 +4,9 @@ import logging import tempfile -from perf_llm_eval.parser import setup_parser +from llm_eval_test.parser import setup_parser -logger = logging.getLogger("perf-llm-eval") +logger = logging.getLogger("llm-eval-test") def eval_cli(): @@ -35,7 +35,7 @@ def eval_cli(): os.environ["UNITXT_ARTIFACTORIES"] = args.catalog_path # Late import to avoid slow cli - from perf_llm_eval.lm_eval_wrapper import LMEvalWrapper + from llm_eval_test.lm_eval_wrapper import LMEvalWrapper if args.command == 'list': LMEvalWrapper.list_tasks(args.tasks_path) diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/all.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/all.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/all.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/all.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/biology.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/biology.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/biology.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/biology.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/business.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/business.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/business.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/business.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/chemistry.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/chemistry.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/chemistry.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/chemistry.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/computer_science.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/computer_science.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/computer_science.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/computer_science.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/economics.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/economics.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/economics.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/economics.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/engineering.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/engineering.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/engineering.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/engineering.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/health.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/health.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/health.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/health.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/history.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/history.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/history.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/history.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/law.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/law.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/law.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/law.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/math.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/math.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/math.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/math.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/other.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/other.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/other.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/other.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/philosophy.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/philosophy.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/philosophy.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/philosophy.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/physics.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/physics.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/physics.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/physics.json diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/psychology.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/psychology.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/psychology.json rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/psychology.json diff --git a/perf_llm_eval/benchmarks/catalog/metrics/accuracy.json b/llm_eval_test/benchmarks/catalog/metrics/accuracy.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/metrics/accuracy.json rename to llm_eval_test/benchmarks/catalog/metrics/accuracy.json diff --git a/perf_llm_eval/benchmarks/catalog/processors/first_character.json b/llm_eval_test/benchmarks/catalog/processors/first_character.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/processors/first_character.json rename to llm_eval_test/benchmarks/catalog/processors/first_character.json diff --git a/perf_llm_eval/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json b/llm_eval_test/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json rename to llm_eval_test/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json diff --git a/perf_llm_eval/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json b/llm_eval_test/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json similarity index 100% rename from perf_llm_eval/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json rename to llm_eval_test/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json diff --git a/perf_llm_eval/benchmarks/tasks/_mmlu_pro.yaml b/llm_eval_test/benchmarks/tasks/_mmlu_pro.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/_mmlu_pro.yaml rename to llm_eval_test/benchmarks/tasks/_mmlu_pro.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_all.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_all.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_all.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_all.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_biology.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_biology.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_biology.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_biology.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_business.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_business.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_business.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_business.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_chemistry.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_chemistry.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_chemistry.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_chemistry.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_computer_science.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_computer_science.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_computer_science.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_computer_science.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_economics.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_economics.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_economics.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_economics.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_engineering.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_engineering.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_engineering.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_engineering.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_health.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_health.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_health.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_health.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_history.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_history.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_history.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_history.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_law.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_law.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_law.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_law.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_math.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_math.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_math.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_math.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_other.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_other.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_other.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_other.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_philosophy.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_philosophy.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_philosophy.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_philosophy.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_physics.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_physics.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_physics.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_physics.yaml diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_psychology.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_psychology.yaml similarity index 100% rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_psychology.yaml rename to llm_eval_test/benchmarks/tasks/mmlu_pro_psychology.yaml diff --git a/perf_llm_eval/benchmarks/tasks/task.py b/llm_eval_test/benchmarks/tasks/task.py similarity index 100% rename from perf_llm_eval/benchmarks/tasks/task.py rename to llm_eval_test/benchmarks/tasks/task.py diff --git a/perf_llm_eval/benchmarks/tasks/unitxt b/llm_eval_test/benchmarks/tasks/unitxt similarity index 100% rename from perf_llm_eval/benchmarks/tasks/unitxt rename to llm_eval_test/benchmarks/tasks/unitxt diff --git a/perf_llm_eval/lm_eval_wrapper.py b/llm_eval_test/lm_eval_wrapper.py similarity index 97% rename from perf_llm_eval/lm_eval_wrapper.py rename to llm_eval_test/lm_eval_wrapper.py index 0cdedef..9f8ee89 100644 --- a/perf_llm_eval/lm_eval_wrapper.py +++ b/llm_eval_test/lm_eval_wrapper.py @@ -8,7 +8,7 @@ from lm_eval.tasks import TaskManager # type: ignore from lm_eval.utils import handle_non_serializable, make_table -logger = logging.getLogger("perf-llm-eval") +logger = logging.getLogger("llm-eval-test") class LMEvalWrapper(object): @staticmethod diff --git a/perf_llm_eval/parser.py b/llm_eval_test/parser.py similarity index 100% rename from perf_llm_eval/parser.py rename to llm_eval_test/parser.py diff --git a/perf_llm_eval/wrappers/unitxt/data/data.py b/llm_eval_test/wrappers/unitxt/data/data.py similarity index 100% rename from perf_llm_eval/wrappers/unitxt/data/data.py rename to llm_eval_test/wrappers/unitxt/data/data.py diff --git a/perf_llm_eval/wrappers/unitxt/metric/metric.py b/llm_eval_test/wrappers/unitxt/metric/metric.py similarity index 100% rename from perf_llm_eval/wrappers/unitxt/metric/metric.py rename to llm_eval_test/wrappers/unitxt/metric/metric.py diff --git a/pyproject.toml b/pyproject.toml index 76d917b..ea7c4e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "perf-llm-eval" +name = "llm-eval-test" version = "0.1.0" description = "" authors = ["Samuel Monson "] @@ -12,7 +12,7 @@ unitxt = "^1.13.1" [tool.poetry.scripts] -perf-llm-eval = "perf_llm_eval.__main__:eval_cli" +llm-eval-test = "llm_eval_test.__main__:eval_cli" [build-system] requires = ["poetry-core"]