Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable autorag to automatically generate the evaluation dataset and evaluate the RAG system #36

Merged
merged 20 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions evals/benchmark/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024 Intel Corporation
hshen14 marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ground_truth_file: ./ground_truth.jsonl
use_openai_key: false
search_type: [similarity, mmr]
k: [1]
fetch_k: [5]
score_threshold: [0.3]
top_n: [1]
temperature: [0.01]
top_k: [1, 3, 5]
top_p: [0.1]
repetition_penalty: [1.0]
102 changes: 102 additions & 0 deletions evals/benchmark/ragas_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -x

function main {

init_params "$@"
run_benchmark

}

# init params
function init_params {
search_type="similarity"
k=1
fetch_k=5
score_threshold=0.3
top_n=1
max_chuck_size=256
temperature=0.01
top_k=1
top_p=0.1
repetition_penalty=1.0

for var in "$@"
do
case $var in
--ground_truth_file=*)
ground_truth_file=$(echo $var |cut -f2 -d=)
;;
--use_openai_key=*)
use_openai_key=$(echo $var |cut -f2 -d=)
;;
--search_type=*)
search_type=$(echo $var |cut -f2 -d=)
;;
--k=*)
k=$(echo $var |cut -f2 -d=)
;;
--fetch_k=*)
fetch_k=$(echo $var |cut -f2 -d=)
;;
--score_threshold=*)
score_threshold=$(echo ${var} |cut -f2 -d=)
;;
--top_n=*)
top_n=$(echo ${var} |cut -f2 -d=)
;;
--temperature=*)
temperature=$(echo $var |cut -f2 -d=)
;;
--top_k=*)
top_k=$(echo $var |cut -f2 -d=)
;;
--top_p=*)
top_p=$(echo $var |cut -f2 -d=)
;;
--repetition_penalty=*)
repetition_penalty=$(echo ${var} |cut -f2 -d=)
;;
esac
done

}

# run_benchmark
function run_benchmark {

if [[ ${use_openai_key} == True ]]; then
use_openai_key="--use_openai_key"
else
use_openai_key=""
fi

python -u ../evaluation/autorag/evaluation/ragas_evaluation_benchmark.py \
--ground_truth_file ${ground_truth_file} \
--input_path ${input_path} \
${use_openai_key} \
--search_type ${search_type} \
--k ${k} \
--fetch_k ${fetch_k} \
--score_threshold ${score_threshold} \
--top_n ${top_n} \
--temperature ${temperature} \
--top_k ${top_k} \
--top_p ${top_p} \
--repetition_penalty ${repetition_penalty}
}

main "$@"
82 changes: 82 additions & 0 deletions evals/benchmark/run_rag_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import subprocess

import jsonlines
import yaml


def read_yaml_file(file_path):
with open(file_path, "r") as stream:
try:
return yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)


if __name__ == "__main__":
if os.path.exists("result_ragas.jsonl"):
os.remove("result_ragas.jsonl")
script_path = "ragas_benchmark.sh"

parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str)
args = parser.parse_args()

data = read_yaml_file(args.config)
data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}

ground_truth_file = data["ground_truth_file"]
use_openai_key = data["use_openai_key"]
search_types = data["search_type"]
ks = data["k"]
fetch_ks = data["fetch_k"]
score_thresholds = data["score_threshold"]
top_ns = data["top_n"]
temperatures = data["temperature"]
top_ks = data["top_k"]
top_ps = data["top_p"]
repetition_penaltys = data["repetition_penalty"]

for search_type in search_types:
for k in ks:
for fetch_k in fetch_ks:
for score_threshold in score_thresholds:
for top_n in top_ns:
for temperature in temperatures:
for top_k in top_ks:
for top_p in top_ps:
for repetition_penalty in repetition_penaltys:
subprocess.run(
[
"bash",
script_path,
"--ground_truth_file=" + ground_truth_file,
"--use_openai_key=" + str(use_openai_key),
"--search_type=" + search_type,
"--k=" + k,
"--fetch_k=" + fetch_k,
"--score_threshold=" + score_threshold,
"--top_n=" + top_n,
"--temperature=" + temperature,
"--top_k=" + top_k,
"--top_p=" + top_p,
"--repetition_penalty=" + repetition_penalty,
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
76 changes: 76 additions & 0 deletions evals/evaluation/autorag/data_generation/gen_answer_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import re

import jsonlines
import torch
from modelscope import AutoModelForCausalLM, AutoTokenizer # pylint: disable=E0401

from .prompt_dict import TRUTHGENERATE_PROMPT


def load_documents(document_file_jsonl_path):
document_list = []
with open(document_file_jsonl_path) as file:
for stu in jsonlines.Reader(file):
passages = [stu["query"], stu["pos"][0]]
document_list.append(passages)
return document_list


def answer_generate(llm, base_dir, file_json_path, generation_config):
documents = load_documents(base_dir)

try:
if isinstance(llm, str):
use_endpoint = False
tokenizer = AutoTokenizer.from_pretrained(llm)
llm = AutoModelForCausalLM.from_pretrained(llm, device_map="auto", torch_dtype=torch.float16)
llm.eval()
else:
use_endpoint = True
llm = llm
except:
print("Please check the setting llm!")

for question, context in enumerate(documents):
if context and question:
prompt = TRUTHGENERATE_PROMPT.format(question=question, context=context)
if not use_endpoint:
with torch.no_grad():
model_input = tokenizer(prompt, return_tensors="pt")
res = llm.generate(**model_input, generation_config=generation_config)[0]
res = tokenizer.decode(res, skip_special_tokens=True)
else:
res = llm.invoke(prompt)

res = res[res.find("Generated ground_truth:") :]
res = re.sub("Generated ground_truth:", "", res)
res = re.sub("---", "", res)

result_str = res.replace("#", " ").replace(r"\t", " ").replace("\n", " ").replace("\n\n", " ").strip()

if result_str and not result_str.isspace():
data = {
"question": question,
"context": [context],
"ground_truth": result_str,
}
with jsonlines.open(file_json_path, "a") as file_json:
file_json.write(data)
Loading