Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add no_answer option to results #24

Merged
merged 6 commits into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 51 additions & 10 deletions haystack/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(
self,
model_name_or_path,
context_window_size=30,
no_ans_threshold=-100,
no_ans_boost=-100,
batch_size=16,
use_gpu=True,
n_candidates_per_passage=2):
Expand All @@ -40,19 +40,24 @@ def __init__(
....
See https://huggingface.co/models for full list of available models.
:param context_window_size: The size, in characters, of the window around the answer span that is used when displaying the context around the answer.
:param no_ans_threshold: How much greater the no_answer logit needs to be over the pos_answer in order to be chosen.
The higher the value, the more `uncertain` answers are accepted
:param no_ans_boost: How much the no_answer logit is boosted/increased.
The higher the value, the more likely a "no answer possible" is returned by the model
:param batch_size: Number of samples the model receives in one batch for inference
:param use_gpu: Whether to use GPU (if available)
:param n_candidates_per_passage: How many candidate answers are extracted per text sequence that the model can process at once (depends on `max_seq_len`).
Note: This is not the number of "final answers" you will receive
(see `top_k` in FARMReader.predict() or Finder.get_answers() for that)
# TODO adjust farm. n_cand = 2 returns no answer + highest positive answer
# should return no answer + 2 best positive answers
# drawback: answers from a single paragraph might be very similar in text and score
# we need to have more varied answers (by excluding overlapping answers?)
"""


self.inferencer = Inferencer.load(model_name_or_path, batch_size=batch_size, gpu=use_gpu, task_type="question_answering")
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
self.inferencer.model.prediction_heads[0].no_ans_threshold = no_ans_threshold
self.inferencer.model.prediction_heads[0].no_ans_threshold = no_ans_boost # TODO adjust naming and concept in FARM
self.no_ans_boost = no_ans_boost
self.inferencer.model.prediction_heads[0].n_best = n_candidates_per_passage

def train(self, data_dir, train_filename, dev_filename=None, test_file_name=None,
Expand Down Expand Up @@ -183,16 +188,21 @@ def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None, m
}
input_dicts.append(cur)

# get answers from QA model (Top 5 per input paragraph)
# get answers from QA model (Default: top 5 per input paragraph)
predictions = self.inferencer.inference_from_dicts(
dicts=input_dicts, rest_api_schema=True, max_processes=max_processes
)

# assemble answers from all the different paragraphs & format them
# assemble answers from all the different paragraphs & format them.
# For the "no answer" option, we collect all no_ans_gaps and decide how likely
# a no answer is based on all no_ans_gaps values across all documents
answers = []
no_ans_gaps = []
best_score_answer = 0
for pred in predictions:
for a in pred["predictions"][0]["answers"]:
if a["answer"]: #skip "no answer"
# skip "no answers" here
if a["answer"]:
cur = {"answer": a["answer"],
"score": a["score"],
"probability": float(expit(np.asarray([a["score"]]) / 8)), #just a pseudo prob for now
Expand All @@ -201,14 +211,45 @@ def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None, m
"offset_end": a["offset_answer_end"] - a["offset_context_start"],
"document_id": a["document_id"]}
answers.append(cur)
no_ans_gaps.append(pred["predictions"][0]["no_ans_gap"])
if a["score"] > best_score_answer:
best_score_answer = a["score"]

# adjust no_ans_gaps
no_ans_gaps = np.array(no_ans_gaps)
no_ans_gaps_adjusted = no_ans_gaps + self.no_ans_boost

# We want to heuristically rank how likely or unlikely the "no answer" option is.

# case: all documents return no answer, then all no_ans_gaps are positive
if np.sum(no_ans_gaps_adjusted < 0) == 0:
# to rank we add the smallest no_ans_gap (a document where an answer would be nearly as likely as the no anser)
# to the highest answer score we found
no_ans_score = best_score_answer + min(no_ans_gaps_adjusted)
# case: documents where answers are preferred over no answer, the no_ans_gap is negative
else:
# the lowest (highest negative) no_ans_gap would be needed as positive no_ans_boost for the
# model to return "no answer" on all documents
# we subtract this value from the best answer score to rank our "no answer" option
# magically this is the same equation as used for the case above : )
no_ans_score = best_score_answer + min(no_ans_gaps_adjusted)

cur = {"answer": "[computer says no answer is likely]",
"score": no_ans_score,
"probability": float(expit(np.asarray(no_ans_score) / 8)), # just a pseudo prob for now
"context": "",
"offset_start": 0,
"offset_end": 0,
"document_id": None}
answers.append(cur)

# sort answers by their `probability` and select top-k
answers = sorted(
answers, key=lambda k: k["probability"], reverse=True
)
answers = answers[:top_k]

result = {"question": question,
"answers": answers}
"adjust_no_ans_boost": -min(no_ans_gaps_adjusted),
"answers": answers}

return result
return result
6 changes: 4 additions & 2 deletions haystack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ def print_answers(results, details="all"):
for key in keys_to_drop:
if key in a:
del a[key]
# print them
pp.pprint(answers)

pp.pprint(answers)
else:
pp.pprint(results)


def convert_labels_to_squad(labels_file):
Expand Down
4 changes: 3 additions & 1 deletion tutorials/Tutorial1_Basic_QA_Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
# Reader use more powerful but slower deep learning models
# You can select a local model or any of the QA models published on huggingface's model hub (https://huggingface.co/models)
# here: a medium sized BERT QA model trained via FARM on Squad 2.0
reader = FARMReader(model_name_or_path="deepset/bert-base-cased-squad2", use_gpu=False)
# You can adjust the model to return "no answer possible" with the no_ans_boost. Higher values mean the model prefers "no answer possible"
reader = FARMReader(model_name_or_path="deepset/bert-base-cased-squad2", use_gpu=False, no_ans_boost=0)

# OR: use alternatively a reader from huggingface's transformers package (https://github.com/huggingface/transformers)
# reader = TransformersReader(model="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
Expand All @@ -50,6 +51,7 @@
# The higher top_k_retriever, the better (but also the slower) your answers.
prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)

#prediction = finder.get_answers(question="Who is the daughter of Arya Stark?", top_k_reader=5) # impossible question test
#prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
#prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)

Expand Down