cleanup and use average stats

Signed-off-by: Jack Luar <[email protected]>
The-OpenROAD-Project · Nov 10, 2024 · b3f05ef · b3f05ef · luarss · Nov 10, 2024
1 parent 61d54ca
commit b3f05ef
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/evaluation/auto_evaluation/dataset/preprocess.py b/evaluation/auto_evaluation/dataset/preprocess.py
@@ -48,8 +48,12 @@ def read_deepeval_cache():
                 metric["metric_data"]["success"]
             )
 
-    print("Metric Scores: ", metric_scores)
-    print("Metric Passes: ", metric_passes)
+    print("Average Metric Scores: ")
+    for key, value in metric_scores.items():
+        print(key, sum(value) / len(value))
+    print("Metric Passrates: ")
+    for key, value in metric_passes.items():
+        print(key, value.count(True) / len(value))
 
 
 if __name__ == "__main__":

diff --git a/evaluation/auto_evaluation/src/models/vertex_ai.py b/evaluation/auto_evaluation/src/models/vertex_ai.py
@@ -6,8 +6,6 @@
 import instructor
 
 from typing import Any
-
-# from langchain_google_vertexai import ChatVertexAI, HarmBlockThreshold, HarmCategory
 from vertexai.generative_models import GenerativeModel, HarmBlockThreshold, HarmCategory  # type: ignore
 from deepeval.models.base_model import DeepEvalBaseLLM
 from pydantic import BaseModel