Skip to content

Commit

Permalink
Rename question parameter to query
Browse files Browse the repository at this point in the history
  • Loading branch information
tanaysoni committed Nov 24, 2020
1 parent e3a68ae commit b2672f9
Show file tree
Hide file tree
Showing 15 changed files with 166 additions and 166 deletions.
6 changes: 3 additions & 3 deletions haystack/document_store/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,10 +412,10 @@ def query(
body["query"]["bool"]["filter"] = filter_clause

# Retrieval via custom query
elif custom_query: # substitute placeholder for question and filters for the custom_query template string
elif custom_query: # substitute placeholder for query and filters for the custom_query template string
template = Template(custom_query)
# replace all "${question}" placeholder(s) with query
substitutions = {"question": query}
# replace all "${query}" placeholder(s) with query
substitutions = {"query": query}
# For each filter we got passed, we'll try to find & replace the corresponding placeholder in the template
# Example: filters={"years":[2018]} => replaces {$years} in custom_query with '[2018]'
if filters:
Expand Down
2 changes: 1 addition & 1 deletion haystack/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def calculate_average_precision_and_reciprocal_rank(questions_with_docs: List[di

if found_relevant_doc:
questions_with_correct_doc.append({
"question": question["question"],
"query": question["question"],
"docs": question["docs"]
})

Expand Down
4 changes: 2 additions & 2 deletions haystack/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_answers(self, question: str, top_k_reader: int = 1, top_k_retriever: int
len_chars = sum([len(d.text) for d in documents])
logger.info(f"Reader is looking for detailed answer in {len_chars} chars ...")

results = self.reader.predict(question=question,
results = self.reader.predict(query=question,
documents=documents,
top_k=top_k_reader) # type: Dict[str, Any]

Expand Down Expand Up @@ -364,7 +364,7 @@ def eval_batch(
self.reader.return_no_answers = True
reader_start_time = time.time()
predictions = self.reader.predict_batch(questions_with_correct_doc,
top_k_per_question=top_k_reader, batch_size=batch_size)
top_k_per_query=top_k_reader, batch_size=batch_size)
reader_total_time = time.time() - reader_start_time

for pred in predictions:
Expand Down
10 changes: 5 additions & 5 deletions haystack/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def __init__(self, reader: BaseReader, retriever: BaseRetriever):
self.pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
self.pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

def run(self, question, top_k_retriever=5, top_k_reader=5):
output = self.pipeline.run(question=question,
def run(self, query, top_k_retriever=5, top_k_reader=5):
output = self.pipeline.run(query=query,
top_k_retriever=top_k_retriever,
top_k_reader=top_k_reader)
return output
Expand All @@ -150,8 +150,8 @@ def __init__(self, retriever: BaseRetriever):
self.pipeline = Pipeline()
self.pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])

def run(self, question, top_k_retriever=5):
output = self.pipeline.run(question=question, top_k_retriever=top_k_retriever)
def run(self, query, top_k_retriever=5):
output = self.pipeline.run(query=query, top_k_retriever=top_k_retriever)
document_dicts = [doc.to_dict() for doc in output["documents"]]
output["documents"] = document_dicts
return output
Expand Down Expand Up @@ -183,7 +183,7 @@ def run(self, **kwargs):
for i, _ in inputs:
documents.extend(i["documents"])
output = {
"question": inputs[0][0]["question"],
"query": inputs[0][0]["query"],
"documents": documents
}
return output, "output_1"
8 changes: 4 additions & 4 deletions haystack/reader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ class BaseReader(ABC):
outgoing_edges = 1

@abstractmethod
def predict(self, question: str, documents: List[Document], top_k: Optional[int] = None):
def predict(self, query: str, documents: List[Document], top_k: Optional[int] = None):
pass

@abstractmethod
def predict_batch(self, question_doc_list: List[dict], top_k_per_question: Optional[int] = None,
def predict_batch(self, query_doc_list: List[dict], top_k_per_query: Optional[int] = None,
batch_size: Optional[int] = None):
pass

Expand Down Expand Up @@ -47,9 +47,9 @@ def _calc_no_answer(no_ans_gaps: Sequence[float], best_score_answer: float):
"meta": None,}
return no_ans_prediction, max_no_ans_gap

def run(self, question: str, documents: List[Document], top_k: Optional[int] = None):
def run(self, query: str, documents: List[Document], top_k: Optional[int] = None):
if documents:
results = self.predict(question=question, documents=documents, top_k=top_k)
results = self.predict(query=query, documents=documents, top_k=top_k)
else:
results = {"answers": []}

Expand Down
42 changes: 21 additions & 21 deletions haystack/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,16 @@ def save(self, directory: Path):
self.inferencer.model.save(directory)
self.inferencer.processor.save(directory)

def predict_batch(self, question_doc_list: List[dict], top_k_per_question: int = None, batch_size: int = None):
def predict_batch(self, query_doc_list: List[dict], top_k_per_query: int = None, batch_size: int = None):
"""
Use loaded QA model to find answers for a list of questions in each question's supplied list of Document.
Use loaded QA model to find answers for a list of queries in each query's supplied list of Document.
Returns list of dictionaries containing answers sorted by (desc.) probability
:param question_doc_list: List of dictionaries containing questions with their retrieved documents
:param top_k_per_question: The maximum number of answers to return for each question
:param query_doc_list: List of dictionaries containing queries with their retrieved documents
:param top_k_per_query: The maximum number of answers to return for each query
:param batch_size: Number of samples the model receives in one batch for inference
:return: List of dictionaries containing question and answers
:return: List of dictionaries containing query and answers
"""

# convert input to FARM format
Expand All @@ -261,20 +261,20 @@ def predict_batch(self, question_doc_list: List[dict], top_k_per_question: int =
labels = []

# build input objects for inference_from_objects
for question_with_docs in question_doc_list:
documents = question_with_docs["docs"]
question = question_with_docs["question"]
labels.append(question)
for query_with_docs in query_doc_list:
documents = query_with_docs["docs"]
query = query_with_docs["query"]
labels.append(query)
number_of_docs.append(len(documents))

for doc in documents:
cur = QAInput(doc_text=doc.text,
questions=Question(text=question.question,
questions=Question(text=query.question,
uid=doc.id))
inputs.append(cur)

self.inferencer.batch_size = batch_size
# make predictions on all document-question pairs
# make predictions on all document-query pairs
predictions = self.inferencer.inference_from_objects(
objects=inputs, return_json=False, multiprocessing_chunksize=1
)
Expand All @@ -290,26 +290,26 @@ def predict_batch(self, question_doc_list: List[dict], top_k_per_question: int =

result = []
for idx, group in enumerate(grouped_predictions):
answers, max_no_ans_gap = self._extract_answers_of_predictions(group, top_k_per_question)
question = group[0].question
answers, max_no_ans_gap = self._extract_answers_of_predictions(group, top_k_per_query)
query = group[0].question
cur_label = labels[idx]
result.append({
"question": question,
"query": query,
"no_ans_gap": max_no_ans_gap,
"answers": answers,
"label": cur_label
})

return result

def predict(self, question: str, documents: List[Document], top_k: Optional[int] = None):
def predict(self, query: str, documents: List[Document], top_k: Optional[int] = None):
"""
Use loaded QA model to find answers for a question in the supplied list of Document.
Use loaded QA model to find answers for a query in the supplied list of Document.
Returns dictionaries containing answers sorted by (desc.) probability.
Example:
{'question': 'Who is the father of Arya Stark?',
{'query': 'Who is the father of Arya Stark?',
'answers': [
{'answer': 'Eddard,',
'context': " She travels with her father, Eddard, to King's Landing when he is ",
Expand All @@ -323,17 +323,17 @@ def predict(self, question: str, documents: List[Document], top_k: Optional[int]
]
}
:param question: Question string
:param query: query string
:param documents: List of Document in which to search for the answer
:param top_k: The maximum number of answers to return
:return: Dict containing question and answers
:return: Dict containing query and answers
"""

# convert input to FARM format
inputs = []
for doc in documents:
cur = QAInput(doc_text=doc.text,
questions=Question(text=question,
questions=Question(text=query,
uid=doc.id))
inputs.append(cur)

Expand All @@ -344,7 +344,7 @@ def predict(self, question: str, documents: List[Document], top_k: Optional[int]
)
# assemble answers from all the different documents & format them.
answers, max_no_ans_gap = self._extract_answers_of_predictions(predictions, top_k)
result = {"question": question,
result = {"query": query,
"no_ans_gap": max_no_ans_gap,
"answers": answers}

Expand Down
18 changes: 9 additions & 9 deletions haystack/reader/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ def __init__(

# TODO context_window_size behaviour different from behavior in FARMReader

def predict(self, question: str, documents: List[Document], top_k: Optional[int] = None):
def predict(self, query: str, documents: List[Document], top_k: Optional[int] = None):
"""
Use loaded QA model to find answers for a question in the supplied list of Document.
Use loaded QA model to find answers for a query in the supplied list of Document.
Returns dictionaries containing answers sorted by (desc.) probability.
Example:
{'question': 'Who is the father of Arya Stark?',
{'query': 'Who is the father of Arya Stark?',
'answers': [
{'answer': 'Eddard,',
'context': " She travels with her father, Eddard, to King's Landing when he is ",
Expand All @@ -86,19 +86,19 @@ def predict(self, question: str, documents: List[Document], top_k: Optional[int]
]
}
:param question: Question string
:param query: query string
:param documents: List of Document in which to search for the answer
:param top_k: The maximum number of answers to return
:return: Dict containing question and answers
:return: Dict containing query and answers
"""
# get top-answers for each candidate passage
answers = []
no_ans_gaps = []
best_overall_score = 0
for doc in documents:
query = {"context": doc.text, "question": question}
predictions = self.model(query,
transformers_query = {"context": doc.text, "question": query}
predictions = self.model(transformers_query,
topk=self.top_k_per_candidate,
handle_impossible_answer=self.return_no_answers,
max_seq_len=self.max_seq_len,
Expand Down Expand Up @@ -147,12 +147,12 @@ def predict(self, question: str, documents: List[Document], top_k: Optional[int]
)
answers = answers[:top_k]

results = {"question": question,
results = {"query": query,
"answers": answers}

return results

def predict_batch(self, question_doc_list: List[dict], top_k_per_question: Optional[int] = None,
def predict_batch(self, query_doc_list: List[dict], top_k_per_query: Optional[int] = None,
batch_size: Optional[int] = None):

raise NotImplementedError("Batch prediction not yet available in TransformersReader.")
12 changes: 6 additions & 6 deletions haystack/retriever/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def eval(
) -> dict:
"""
Performs evaluation on the Retriever.
Retriever is evaluated based on whether it finds the correct document given the question string and at which
Retriever is evaluated based on whether it finds the correct document given the query string and at which
position in the ranking of documents the correct document is.
| Returns a dict containing the following metrics:
Expand All @@ -67,7 +67,7 @@ def eval(
:param label_index: Index/Table in DocumentStore where labeled questions are stored
:param doc_index: Index/Table in DocumentStore where documents that are used for evaluation are stored
:param top_k: How many documents to return per question
:param top_k: How many documents to return per query
:param open_domain: If ``True``, retrieval will be evaluated by checking if the answer string to a question is
contained in the retrieved docs (common approach in open-domain QA).
If ``False``, retrieval uses a stricter evaluation that checks if the retrieved document ids
Expand Down Expand Up @@ -169,17 +169,17 @@ def eval(

def run(
self,
question: str,
query: str,
filters: Optional[dict] = None,
top_k_retriever: Optional[int] = None,
top_k_reader: Optional[int] = None,
):
if top_k_retriever:
documents = self.retrieve(query=question, filters=filters, top_k=top_k_retriever)
documents = self.retrieve(query=query, filters=filters, top_k=top_k_retriever)
else:
documents = self.retrieve(query=question, filters=filters)
documents = self.retrieve(query=query, filters=filters)
output = {
"question": question,
"query": query,
"documents": documents,
"top_k": top_k_reader
}
Expand Down
4 changes: 2 additions & 2 deletions haystack/retriever/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class ElasticsearchRetriever(BaseRetriever):
def __init__(self, document_store: ElasticsearchDocumentStore, custom_query: str = None):
"""
:param document_store: an instance of a DocumentStore to retrieve documents from.
:param custom_query: query string as per Elasticsearch DSL with a mandatory question placeholder($question).
:param custom_query: query string as per Elasticsearch DSL with a mandatory query placeholder(query).
Optionally, ES `filter` clause can be added where the values of `terms` are placeholders
that get substituted during runtime. The placeholder(${filter_name_1}, ${filter_name_2}..)
Expand All @@ -32,7 +32,7 @@ def __init__(self, document_store: ElasticsearchDocumentStore, custom_query: str
> "query": {
> "bool": {
> "should": [{"multi_match": {
> "query": "${question}", // mandatory $question placeholder
> "query": "${query}", // mandatory $query placeholder
> "type": "most_fields",
> "fields": ["text", "title"]}}],
> "filter": [ // optional custom filters
Expand Down
6 changes: 3 additions & 3 deletions haystack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@ def export_answers_to_csv(agg_results: list, output_file):
if isinstance(agg_results, dict):
agg_results = [agg_results]

assert "question" in agg_results[0], f"Wrong format used for {agg_results[0]}"
assert "query" in agg_results[0], f"Wrong format used for {agg_results[0]}"
assert "answers" in agg_results[0], f"Wrong format used for {agg_results[0]}"

data = {} # type: Dict[str, List[Any]]
data["question"] = []
data["query"] = []
data["prediction"] = []
data["prediction_rank"] = []
data["prediction_context"] = []

for res in agg_results:
for i in range(len(res["answers"])):
temp = res["answers"][i]
data["question"].append(res["question"])
data["query"].append(res["query"])
data["prediction"].append(temp["answer"])
data["prediction_rank"].append(i + 1)
data["prediction_context"].append(temp["context"])
Expand Down
4 changes: 2 additions & 2 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,14 @@ def no_answer_reader(request, transformers_roberta, farm_roberta):
@pytest.fixture()
def prediction(reader, test_docs_xs):
docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)
prediction = reader.predict(query="Who lives in Berlin?", documents=docs, top_k=5)
return prediction


@pytest.fixture()
def no_answer_prediction(no_answer_reader, test_docs_xs):
docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
prediction = no_answer_reader.predict(question="What is the meaning of life?", documents=docs, top_k=5)
prediction = no_answer_reader.predict(query="What is the meaning of life?", documents=docs, top_k=5)
return prediction


Expand Down
2 changes: 1 addition & 1 deletion test/test_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_finder_get_answers(reader, retriever_with_docs, document_store_with_doc
prediction = finder.get_answers(question="Who lives in Berlin?", top_k_retriever=10,
top_k_reader=3)
assert prediction is not None
assert prediction["question"] == "Who lives in Berlin?"
assert prediction["query"] == "Who lives in Berlin?"
assert prediction["answers"][0]["answer"] == "Carla"
assert prediction["answers"][0]["probability"] <= 1
assert prediction["answers"][0]["probability"] >= 0
Expand Down
Loading

0 comments on commit b2672f9

Please sign in to comment.