From cc2363ed47b7e5893db1a9b407b53496777aea42 Mon Sep 17 00:00:00 2001 From: Yura Lukashik Date: Fri, 20 Oct 2023 17:42:07 +0300 Subject: [PATCH] Search: measure time for each query execution stage --- src/semantic_search/semantic_search/query.py | 16 +++++++++++++++- src/services/slack_service.py | 1 - 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/semantic_search/semantic_search/query.py b/src/semantic_search/semantic_search/query.py index 87ba087..eb08b6e 100644 --- a/src/semantic_search/semantic_search/query.py +++ b/src/semantic_search/semantic_search/query.py @@ -1,10 +1,17 @@ +import logging +import time from .external_services.pinecone import get_pinecone_index from .external_services.openai import create_embedding, gpt_query def smart_query(namespace, query): + logging.info(f"Executing Smart Query: {query}") + + stage_start_time = time.perf_counter() query_vector = create_embedding(query) + logging.info(f"Smart Query: embedding created in {round(stage_start_time - time.perf_counter(), 2)}s") + stage_start_time = time.perf_counter() query_results = get_pinecone_index().query( queries=[query_vector], top_k=30, @@ -12,6 +19,7 @@ def smart_query(namespace, query): include_values=False, includeMetadata=True ) + logging.info(f"Smart Query: Pinecone search finished in {round(stage_start_time - time.perf_counter(), 2)}s") query_matches = query_results['results'][0]['matches'] messages_for_filtering = ["" + qm['id'] + '' + qm['metadata']['text'] + "" for qm in @@ -21,7 +29,10 @@ def smart_query(namespace, query): f" to the following search query or may contain an answer to the question in it: \"{query}\". Do not include any explanations, only provide" f" a list of IDs separated by comma. Prefer more recent messages. Here is the list of messages:") filter_query += "\n" + "\n".join(messages_for_filtering) + + stage_start_time = time.perf_counter() filter_response = gpt_query(filter_query) + logging.info(f"Smart Query: ChatGPT filtered messages in {round(stage_start_time - time.perf_counter(), 2)}s") filtered_matches = list(filter(lambda m: m['id'] in filter_response, query_matches)) filtered_messages_text = "\n".join([qm['metadata']['text'] for qm in filtered_matches]) @@ -30,4 +41,7 @@ def smart_query(namespace, query): f"information in the found messages: {filtered_messages_text}. Use messages that are " f"relevant to the query, but don't include anything about that in your answer. Also give " f"me an explanation of why each of the included messages is relevant.") - return gpt_query(summarize_answer_query) + stage_start_time = time.perf_counter() + response = gpt_query(summarize_answer_query) + logging.info(f"Smart Query: ChatGPT summarized the answer in {round(stage_start_time - time.perf_counter(), 2)}s") + return response diff --git a/src/services/slack_service.py b/src/services/slack_service.py index 50daf60..8ec619c 100644 --- a/src/services/slack_service.py +++ b/src/services/slack_service.py @@ -171,7 +171,6 @@ def handle_app_mention(event, say): match = re.search(search_pattern, text) if match and smart_search_available: search_query = match.group(1) - logging.info(f"Executing Smart Query: {search_query}") response = smart_query(team_id, search_query) else: response = respond_to_user(messages, openAi_key, team_id)