From bfc29fec2755897bafae6a65756a48e93b0a03ac Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Tue, 15 Oct 2024 10:32:08 -0400
Subject: [PATCH] Rename to llm-eval-test

---
 README.md                                              | 10 +++++-----
 {perf_llm_eval => llm_eval_test}/__main__.py           |  6 +++---
 .../benchmarks/catalog/cards/mmlu_pro/all.json         |  0
 .../benchmarks/catalog/cards/mmlu_pro/biology.json     |  0
 .../benchmarks/catalog/cards/mmlu_pro/business.json    |  0
 .../benchmarks/catalog/cards/mmlu_pro/chemistry.json   |  0
 .../catalog/cards/mmlu_pro/computer_science.json       |  0
 .../benchmarks/catalog/cards/mmlu_pro/economics.json   |  0
 .../benchmarks/catalog/cards/mmlu_pro/engineering.json |  0
 .../benchmarks/catalog/cards/mmlu_pro/health.json      |  0
 .../benchmarks/catalog/cards/mmlu_pro/history.json     |  0
 .../benchmarks/catalog/cards/mmlu_pro/law.json         |  0
 .../benchmarks/catalog/cards/mmlu_pro/math.json        |  0
 .../benchmarks/catalog/cards/mmlu_pro/other.json       |  0
 .../benchmarks/catalog/cards/mmlu_pro/philosophy.json  |  0
 .../benchmarks/catalog/cards/mmlu_pro/physics.json     |  0
 .../benchmarks/catalog/cards/mmlu_pro/psychology.json  |  0
 .../benchmarks/catalog/metrics/accuracy.json           |  0
 .../benchmarks/catalog/processors/first_character.json |  0
 .../catalog/tasks/qa/multiple_choice/with_topic.json   |  0
 .../qa/multiple_choice/with_topic/lm_eval_harness.json |  0
 .../benchmarks/tasks/_mmlu_pro.yaml                    |  0
 .../benchmarks/tasks/mmlu_pro_all.yaml                 |  0
 .../benchmarks/tasks/mmlu_pro_biology.yaml             |  0
 .../benchmarks/tasks/mmlu_pro_business.yaml            |  0
 .../benchmarks/tasks/mmlu_pro_chemistry.yaml           |  0
 .../benchmarks/tasks/mmlu_pro_computer_science.yaml    |  0
 .../benchmarks/tasks/mmlu_pro_economics.yaml           |  0
 .../benchmarks/tasks/mmlu_pro_engineering.yaml         |  0
 .../benchmarks/tasks/mmlu_pro_health.yaml              |  0
 .../benchmarks/tasks/mmlu_pro_history.yaml             |  0
 .../benchmarks/tasks/mmlu_pro_law.yaml                 |  0
 .../benchmarks/tasks/mmlu_pro_math.yaml                |  0
 .../benchmarks/tasks/mmlu_pro_other.yaml               |  0
 .../benchmarks/tasks/mmlu_pro_philosophy.yaml          |  0
 .../benchmarks/tasks/mmlu_pro_physics.yaml             |  0
 .../benchmarks/tasks/mmlu_pro_psychology.yaml          |  0
 .../benchmarks/tasks/task.py                           |  0
 .../benchmarks/tasks/unitxt                            |  0
 {perf_llm_eval => llm_eval_test}/lm_eval_wrapper.py    |  2 +-
 {perf_llm_eval => llm_eval_test}/parser.py             |  0
 .../wrappers/unitxt/data/data.py                       |  0
 .../wrappers/unitxt/metric/metric.py                   |  0
 pyproject.toml                                         |  4 ++--
 44 files changed, 11 insertions(+), 11 deletions(-)
 rename {perf_llm_eval => llm_eval_test}/__main__.py (93%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/all.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/biology.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/business.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/chemistry.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/computer_science.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/economics.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/engineering.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/health.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/history.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/law.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/math.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/other.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/philosophy.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/physics.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/cards/mmlu_pro/psychology.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/metrics/accuracy.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/processors/first_character.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/_mmlu_pro.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_all.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_biology.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_business.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_chemistry.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_computer_science.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_economics.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_engineering.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_health.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_history.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_law.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_math.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_other.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_philosophy.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_physics.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/mmlu_pro_psychology.yaml (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/task.py (100%)
 rename {perf_llm_eval => llm_eval_test}/benchmarks/tasks/unitxt (100%)
 rename {perf_llm_eval => llm_eval_test}/lm_eval_wrapper.py (97%)
 rename {perf_llm_eval => llm_eval_test}/parser.py (100%)
 rename {perf_llm_eval => llm_eval_test}/wrappers/unitxt/data/data.py (100%)
 rename {perf_llm_eval => llm_eval_test}/wrappers/unitxt/metric/metric.py (100%)

diff --git a/README.md b/README.md
index 525abd3..cc402db 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# perf-llm-eval
+# llm-eval-test
 
 A wrapper around [lm-eval-harness](https://github.com/EleutherAI/lm-evaluation-harness) and [Unitxt](https://github.com/IBM/unitxt) designed for evaluation of a local inference endpoint.
 
@@ -25,16 +25,16 @@ python -m venv venv
 source venv/bin/activate
 
 # Install the package
-pip install git+https://github.com/sjmonson/perf-llm-eval.git
+pip install git+https://github.com/sjmonson/llm-eval-test.git
 
 # View run options
-perf-llm-eval run --help
+llm-eval-test run --help
 ```
 
 ## Usage
 
 ```
-usage: perf-llm-eval run [-h] [--catalog_path PATH] [--tasks_path PATH] [-v | -q] -H ENDPOINT -m MODEL -t TASKS -d PATH [-b INT] [-o OUTPUT]
+usage: llm-eval-test run [-h] [--catalog_path PATH] [--tasks_path PATH] [-v | -q] -H ENDPOINT -m MODEL -t TASKS -d PATH [-b INT] [-o OUTPUT]
 
 Run tasks
 
@@ -74,5 +74,5 @@ huggingface-cli download $DATASET --repo-type dataset --local-dir $DATASETS_DIR/
 # Run the benchmark
 ENDPOINT=http://127.0.0.1:8000/v1/completions # An OpenAI API-compatable completions endpoint
 MODEL_NAME=meta-llama/Llama-3.1-8B # Name of the model hosted on the inference server
-perf-llm-eval run --endpoint $ENDPOINT --model $MODEL_NAME --datasets $DATASETS_DIR --tasks mmlu_pro
+llm-eval-test run --endpoint $ENDPOINT --model $MODEL_NAME --datasets $DATASETS_DIR --tasks mmlu_pro
 ```
diff --git a/perf_llm_eval/__main__.py b/llm_eval_test/__main__.py
similarity index 93%
rename from perf_llm_eval/__main__.py
rename to llm_eval_test/__main__.py
index 798f01d..9cdf123 100755
--- a/perf_llm_eval/__main__.py
+++ b/llm_eval_test/__main__.py
@@ -4,9 +4,9 @@
 import logging
 import tempfile
 
-from perf_llm_eval.parser import setup_parser
+from llm_eval_test.parser import setup_parser
 
-logger = logging.getLogger("perf-llm-eval")
+logger = logging.getLogger("llm-eval-test")
 
 
 def eval_cli():
@@ -35,7 +35,7 @@ def eval_cli():
     os.environ["UNITXT_ARTIFACTORIES"] = args.catalog_path
 
     # Late import to avoid slow cli
-    from perf_llm_eval.lm_eval_wrapper import LMEvalWrapper
+    from llm_eval_test.lm_eval_wrapper import LMEvalWrapper
 
     if args.command == 'list':
         LMEvalWrapper.list_tasks(args.tasks_path)
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/all.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/all.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/all.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/all.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/biology.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/biology.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/biology.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/biology.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/business.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/business.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/business.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/business.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/chemistry.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/chemistry.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/chemistry.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/chemistry.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/computer_science.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/computer_science.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/computer_science.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/computer_science.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/economics.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/economics.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/economics.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/economics.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/engineering.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/engineering.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/engineering.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/engineering.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/health.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/health.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/health.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/health.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/history.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/history.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/history.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/history.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/law.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/law.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/law.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/law.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/math.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/math.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/math.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/math.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/other.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/other.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/other.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/other.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/philosophy.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/philosophy.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/philosophy.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/philosophy.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/physics.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/physics.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/physics.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/physics.json
diff --git a/perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/psychology.json b/llm_eval_test/benchmarks/catalog/cards/mmlu_pro/psychology.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/cards/mmlu_pro/psychology.json
rename to llm_eval_test/benchmarks/catalog/cards/mmlu_pro/psychology.json
diff --git a/perf_llm_eval/benchmarks/catalog/metrics/accuracy.json b/llm_eval_test/benchmarks/catalog/metrics/accuracy.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/metrics/accuracy.json
rename to llm_eval_test/benchmarks/catalog/metrics/accuracy.json
diff --git a/perf_llm_eval/benchmarks/catalog/processors/first_character.json b/llm_eval_test/benchmarks/catalog/processors/first_character.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/processors/first_character.json
rename to llm_eval_test/benchmarks/catalog/processors/first_character.json
diff --git a/perf_llm_eval/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json b/llm_eval_test/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json
rename to llm_eval_test/benchmarks/catalog/tasks/qa/multiple_choice/with_topic.json
diff --git a/perf_llm_eval/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json b/llm_eval_test/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json
similarity index 100%
rename from perf_llm_eval/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json
rename to llm_eval_test/benchmarks/catalog/templates/qa/multiple_choice/with_topic/lm_eval_harness.json
diff --git a/perf_llm_eval/benchmarks/tasks/_mmlu_pro.yaml b/llm_eval_test/benchmarks/tasks/_mmlu_pro.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/_mmlu_pro.yaml
rename to llm_eval_test/benchmarks/tasks/_mmlu_pro.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_all.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_all.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_all.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_all.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_biology.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_biology.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_biology.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_biology.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_business.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_business.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_business.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_business.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_chemistry.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_chemistry.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_chemistry.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_chemistry.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_computer_science.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_computer_science.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_computer_science.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_computer_science.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_economics.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_economics.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_economics.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_economics.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_engineering.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_engineering.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_engineering.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_engineering.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_health.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_health.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_health.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_health.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_history.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_history.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_history.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_history.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_law.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_law.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_law.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_law.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_math.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_math.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_math.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_math.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_other.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_other.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_other.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_other.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_philosophy.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_philosophy.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_philosophy.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_philosophy.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_physics.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_physics.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_physics.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_physics.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/mmlu_pro_psychology.yaml b/llm_eval_test/benchmarks/tasks/mmlu_pro_psychology.yaml
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/mmlu_pro_psychology.yaml
rename to llm_eval_test/benchmarks/tasks/mmlu_pro_psychology.yaml
diff --git a/perf_llm_eval/benchmarks/tasks/task.py b/llm_eval_test/benchmarks/tasks/task.py
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/task.py
rename to llm_eval_test/benchmarks/tasks/task.py
diff --git a/perf_llm_eval/benchmarks/tasks/unitxt b/llm_eval_test/benchmarks/tasks/unitxt
similarity index 100%
rename from perf_llm_eval/benchmarks/tasks/unitxt
rename to llm_eval_test/benchmarks/tasks/unitxt
diff --git a/perf_llm_eval/lm_eval_wrapper.py b/llm_eval_test/lm_eval_wrapper.py
similarity index 97%
rename from perf_llm_eval/lm_eval_wrapper.py
rename to llm_eval_test/lm_eval_wrapper.py
index 0cdedef..9f8ee89 100644
--- a/perf_llm_eval/lm_eval_wrapper.py
+++ b/llm_eval_test/lm_eval_wrapper.py
@@ -8,7 +8,7 @@
 from lm_eval.tasks import TaskManager  # type: ignore
 from lm_eval.utils import handle_non_serializable, make_table
 
-logger = logging.getLogger("perf-llm-eval")
+logger = logging.getLogger("llm-eval-test")
 
 class LMEvalWrapper(object):
     @staticmethod
diff --git a/perf_llm_eval/parser.py b/llm_eval_test/parser.py
similarity index 100%
rename from perf_llm_eval/parser.py
rename to llm_eval_test/parser.py
diff --git a/perf_llm_eval/wrappers/unitxt/data/data.py b/llm_eval_test/wrappers/unitxt/data/data.py
similarity index 100%
rename from perf_llm_eval/wrappers/unitxt/data/data.py
rename to llm_eval_test/wrappers/unitxt/data/data.py
diff --git a/perf_llm_eval/wrappers/unitxt/metric/metric.py b/llm_eval_test/wrappers/unitxt/metric/metric.py
similarity index 100%
rename from perf_llm_eval/wrappers/unitxt/metric/metric.py
rename to llm_eval_test/wrappers/unitxt/metric/metric.py
diff --git a/pyproject.toml b/pyproject.toml
index 76d917b..ea7c4e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [tool.poetry]
-name = "perf-llm-eval"
+name = "llm-eval-test"
 version = "0.1.0"
 description = ""
 authors = ["Samuel Monson <smonson@redhat.com>"]
@@ -12,7 +12,7 @@ unitxt = "^1.13.1"
 
 
 [tool.poetry.scripts]
-perf-llm-eval = "perf_llm_eval.__main__:eval_cli"
+llm-eval-test = "llm_eval_test.__main__:eval_cli"
 
 [build-system]
 requires = ["poetry-core"]