-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquestionGenerator.py
111 lines (90 loc) · 3.78 KB
/
questionGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# this script generates questions out of the inserted documents. (currenty 102 question-answers)
# It is not using the pinecone index since there is no possibility to retrieve all documents from the pinecone database.
# The questions and answers are stored in a MongoDB database.
# this will be especially used to question the students as a chatbot and test their knowledge.
from llama_index.core import SimpleDirectoryReader
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import QAGenerationChain
from langchain_community.embeddings import OpenAIEmbeddings
from pymongo import MongoClient
import pinecone
from langchain_community.vectorstores import Pinecone
from langchain.evaluation import ContextQAEvalChain
from python_translator import Translator
from dotenv import load_dotenv
import os, random
def question_generator():
load_dotenv()
# connection to the mongodb database
client = MongoClient(os.getenv('MONGO_CONNECTION_STRING'))
# select database
db = client["DBISquestions"]
# select collection
collection = db["questionAnswer"]
# get all documents from the src/documents folder
slides = SimpleDirectoryReader(os.getenv("SELECTED_FILES")).load_data()
exercises = SimpleDirectoryReader(os.getenv("SELECTED_EXERCISES")).load_data()
# retrieve texts from the documents
text = ""
for i in range(len(slides)):
text += slides[i].text
for i in range(len(exercises)):
text += exercises[i].text
# generate questions out of the text
chain = QAGenerationChain.from_llm(ChatOpenAI(temperature=0))
qa = chain.run(text)
# insert questions and answers into the database
for i in range(len(qa)):
collection.insert_one(qa[i])
return
def random_question_tool(input):
client = MongoClient(os.getenv('MONGO_CONNECTION_STRING'))
database_name = "DBISquestions"
database_list = client.list_database_names()
if database_name in database_list:
db = client["DBISquestions"]
col = db["questionAnswer"]
x = col.count_documents({})
questionanswers = col.find()
a = random.randint(0, x)
question = questionanswers[a].get("question")
# translate the question into german
translator = Translator()
translation = str(translator.translate(question, "german", "english"))
return translation
else:
question_generator()
db = client["DBISquestions"]
col = db["questionAnswer"]
x = col.count_documents({})
questionanswers = col.find()
a = random.randint(0, x)
question = questionanswers[a].get("question")
# translate the question into german
translator = Translator()
translation = str(translator.translate(question, "german", "english"))
return translation
def answer_comparison(input):
question = input.get("question")
answer = input.get("answer")
pinecone.init(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENVIRONMENT")
)
index = pinecone.Index(os.getenv("PINECONE_INDEX_NAME"))
# initialize embedding model
embed = OpenAIEmbeddings(
model = "text-embedding-ada-002",
openai_api_key=os.getenv("OPENAI_API_KEY")
)
text_field = "text"
# connect to index
vector_store = Pinecone(index, embed.embed_query, text_field)
retriever = vector_store.as_retriever()
eval_chain = ContextQAEvalChain(llm=ChatOpenAI(temperature=0), retriever=retriever, chain_type="stuff")
output = eval_chain.evaluate_strings(
input=question,
prediction=answer,
reference=retriever,
)
return output