From bdbd1b323bc855e38ae16f59dc52b6d87d662264 Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Thu, 15 Oct 2020 18:41:36 +0200 Subject: [PATCH] Add create_index and similarity metric to api config (#493) * make creation of label index optional * add params for rest api * reset tutorial flag --- rest_api/config.py | 2 ++ rest_api/controller/search.py | 6 +++++- tutorials/Tutorial1_Basic_QA_Pipeline.py | 1 - 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rest_api/config.py b/rest_api/config.py index c148bd89c3..bbe05d1edc 100644 --- a/rest_api/config.py +++ b/rest_api/config.py @@ -25,6 +25,8 @@ FAQ_QUESTION_FIELD_NAME = os.getenv("FAQ_QUESTION_FIELD_NAME", "question") EMBEDDING_FIELD_NAME = os.getenv("EMBEDDING_FIELD_NAME", None) EMBEDDING_DIM = os.getenv("EMBEDDING_DIM", None) +VECTOR_SIMILARITY_METRIC = os.getenv("VECTOR_SIMILARITY_METRIC", "dot_product") +CREATE_INDEX = os.getenv("CREATE_INDEX", "True").lower() == "true" # Reader READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", "deepset/roberta-base-squad2") diff --git a/rest_api/controller/search.py b/rest_api/controller/search.py index d643d71cfa..9d6f2ae8af 100644 --- a/rest_api/controller/search.py +++ b/rest_api/controller/search.py @@ -14,7 +14,9 @@ EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, RETRIEVER_TYPE, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \ BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \ DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \ - EMBEDDING_MODEL_FORMAT, READER_TYPE, READER_TOKENIZER, GPU_NUMBER, NAME_FIELD_NAME + EMBEDDING_MODEL_FORMAT, READER_TYPE, READER_TOKENIZER, GPU_NUMBER, NAME_FIELD_NAME, VECTOR_SIMILARITY_METRIC, \ + CREATE_INDEX + from rest_api.controller.utils import RequestLimiter from haystack.document_store.elasticsearch import ElasticsearchDocumentStore from haystack.reader.farm import FARMReader @@ -43,6 +45,8 @@ embedding_field=EMBEDDING_FIELD_NAME, excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore faq_question_field=FAQ_QUESTION_FIELD_NAME, + create_index=CREATE_INDEX, + similarity=VECTOR_SIMILARITY_METRIC ) diff --git a/tutorials/Tutorial1_Basic_QA_Pipeline.py b/tutorials/Tutorial1_Basic_QA_Pipeline.py index e1bfb1ab20..fd76d660e3 100755 --- a/tutorials/Tutorial1_Basic_QA_Pipeline.py +++ b/tutorials/Tutorial1_Basic_QA_Pipeline.py @@ -86,7 +86,6 @@ logger.warning("Since we already have a running ES instance we should not index the same documents again. \n" "If you still want to do this call: document_store.write_documents(dicts) manually ") - # ## Initalize Retriever, Reader, & Finder # # ### Retriever