Skip to content

Commit

Permalink
Fix model definitions on evaluators
Browse files Browse the repository at this point in the history
  • Loading branch information
rogeriochaves committed Jul 29, 2024
1 parent 3a2d442 commit 4a223b1
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 83 deletions.
15 changes: 2 additions & 13 deletions evaluators/haystack/langevals_haystack/faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,14 @@
# Haystack telemetry breaks for AWS lambdas because it tries to write to home folder which is read-only
os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "false"

from typing import Literal
from langevals_core.base_evaluator import (
BaseEvaluator,
EvaluatorEntry,
EvaluationResult,
EvaluationResultSkipped,
EvaluatorSettings,
SingleEvaluationResult,
Money,
LLMEvaluatorSettings
LLMEvaluatorSettings,
)
from pydantic import BaseModel, Field
from haystack.components.evaluators import FaithfulnessEvaluator

from langevals_haystack.lib.common import (
Expand All @@ -30,14 +26,7 @@ class HaystackFaithfulnessEntry(EvaluatorEntry):


class HaystackFaithfulnessSettings(LLMEvaluatorSettings):
model: str = Field(
default="azure/gpt-35-turbo-1106",
description="The model to use for evaluation.",
)
max_tokens: int = Field(
default=2048,
description="The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.",
)
pass


class HaystackFaithfulnessResult(EvaluationResult):
Expand Down
30 changes: 15 additions & 15 deletions evaluators/ragas/langevals_ragas/lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from langevals_core.base_evaluator import (
BaseEvaluator,
EvaluationResult,
LLMEvaluatorSettings,
EvaluatorSettings,
Money,
EvaluationResultSkipped,
EvaluatorEntry
EvaluatorEntry,
)
from pydantic import BaseModel, Field
from pydantic import Field
from ragas import evaluate
from ragas.metrics.base import Metric
from ragas.llms import LangchainLLMWrapper
Expand All @@ -38,34 +38,34 @@
from tqdm.notebook import tqdm as tqdm_notebook
from functools import partialmethod

import json
import re
from typing import List, Optional
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, Faithfulness
from ragas.llms import LangchainLLMWrapper
from ragas.llms.prompt import PromptValue
from langchain_core.callbacks import Callbacks
from pydantic import BaseModel, Field
import litellm
from langchain.schema.output import LLMResult
from langchain_core.outputs.generation import Generation
from pydantic import Field
from langevals_core.utils import calculate_total_tokens

env_vars = []


class RagasSettings(LLMEvaluatorSettings):
model: str = Field(
default="azure/gpt-35-turbo-16k",
class RagasSettings(EvaluatorSettings):
model: Literal[
"openai/gpt-3.5-turbo-16k",
"openai/gpt-4o",
"openai/gpt-4o-mini",
"azure/gpt-35-turbo-16k",
"azure/gpt-4o",
"anthropic/claude-3-5-sonnet-20240620",
] = Field(
default="openai/gpt-3.5-turbo-16k",
description="The model to use for evaluation.",
)
embeddings_model: Literal[
"openai/text-embedding-ada-002",
"azure/text-embedding-ada-002",
] = Field(
default="azure/text-embedding-ada-002",
default="openai/text-embedding-ada-002",
description="The model to use for embeddings.",
)
max_tokens: int = Field(
Expand Down
Loading

0 comments on commit 4a223b1

Please sign in to comment.