-
Notifications
You must be signed in to change notification settings - Fork 103
/
Copy pathmain.py
85 lines (69 loc) · 3.59 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import openai
import random
# Initialize OpenAI API
openai.api_key = os.getenv("OPENAI_API_KEY") # Alternative: Use environment variable
if openai.api_key is None:
raise Exception("No OpenAI API key found. Please set it as an environment variable or in main.py")
# Function to generate queries using OpenAI's ChatGPT
def generate_queries_chatgpt(original_query):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that generates multiple search queries based on a single input query."},
{"role": "user", "content": f"Generate multiple search queries related to: {original_query}"},
{"role": "user", "content": "OUTPUT (4 queries):"}
]
)
generated_queries = response.choices[0]["message"]["content"].strip().split("\n")
return generated_queries
# Mock function to simulate vector search, returning random scores
def vector_search(query, all_documents):
available_docs = list(all_documents.keys())
random.shuffle(available_docs)
selected_docs = available_docs[:random.randint(2, 5)]
scores = {doc: round(random.uniform(0.7, 0.9), 2) for doc in selected_docs}
return {doc: score for doc, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)}
# Reciprocal Rank Fusion algorithm
def reciprocal_rank_fusion(search_results_dict, k=60):
fused_scores = {}
print("Initial individual search result ranks:")
for query, doc_scores in search_results_dict.items():
print(f"For query '{query}': {doc_scores}")
for query, doc_scores in search_results_dict.items():
for rank, (doc, score) in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
if doc not in fused_scores:
fused_scores[doc] = 0
previous_score = fused_scores[doc]
fused_scores[doc] += 1 / (rank + k)
print(f"Updating score for {doc} from {previous_score} to {fused_scores[doc]} based on rank {rank} in query '{query}'")
reranked_results = {doc: score for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)}
print("Final reranked results:", reranked_results)
return reranked_results
# Dummy function to simulate generative output
def generate_output(reranked_results, queries):
return f"Final output based on {queries} and reranked documents: {list(reranked_results.keys())}"
# Predefined set of documents (usually these would be from your search database)
all_documents = {
"doc1": "Climate change and economic impact.",
"doc2": "Public health concerns due to climate change.",
"doc3": "Climate change: A social perspective.",
"doc4": "Technological solutions to climate change.",
"doc5": "Policy changes needed to combat climate change.",
"doc6": "Climate change and its impact on biodiversity.",
"doc7": "Climate change: The science and models.",
"doc8": "Global warming: A subset of climate change.",
"doc9": "How climate change affects daily weather.",
"doc10": "The history of climate change activism."
}
# Main function
if __name__ == "__main__":
original_query = "impact of climate change"
generated_queries = generate_queries_chatgpt(original_query)
all_results = {}
for query in generated_queries:
search_results = vector_search(query, all_documents)
all_results[query] = search_results
reranked_results = reciprocal_rank_fusion(all_results)
final_output = generate_output(reranked_results, generated_queries)
print(final_output)