Search: measure time for each query execution stage

UpMortem · Oct 20, 2023 · cc2363e · cc2363e
1 parent dec2d1b
commit cc2363e
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/src/semantic_search/semantic_search/query.py b/src/semantic_search/semantic_search/query.py
@@ -1,17 +1,25 @@
+import logging
+import time
 from .external_services.pinecone import get_pinecone_index
 from .external_services.openai import create_embedding, gpt_query
 
 
 def smart_query(namespace, query):
+    logging.info(f"Executing Smart Query: {query}")
+
+    stage_start_time = time.perf_counter()
     query_vector = create_embedding(query)
+    logging.info(f"Smart Query: embedding created in {round(stage_start_time - time.perf_counter(), 2)}s")
 
+    stage_start_time = time.perf_counter()
     query_results = get_pinecone_index().query(
         queries=[query_vector],
         top_k=30,
         namespace=namespace,
         include_values=False,
         includeMetadata=True
     )
+    logging.info(f"Smart Query: Pinecone search finished in {round(stage_start_time - time.perf_counter(), 2)}s")
     query_matches = query_results['results'][0]['matches']
 
     messages_for_filtering = ["<MSG_ID>" + qm['id'] + '</MSG_ID><MSG>' + qm['metadata']['text'] + "<MSG>" for qm in
@@ -21,7 +29,10 @@ def smart_query(namespace, query):
                     f" to the following search query or may contain an answer to the question in it: \"{query}\". Do not include any explanations, only provide"
                     f" a list of IDs separated by comma. Prefer more recent messages. Here is the list of messages:")
     filter_query += "\n" + "\n".join(messages_for_filtering)
+
+    stage_start_time = time.perf_counter()
     filter_response = gpt_query(filter_query)
+    logging.info(f"Smart Query: ChatGPT filtered messages in {round(stage_start_time - time.perf_counter(), 2)}s")
 
     filtered_matches = list(filter(lambda m: m['id'] in filter_response, query_matches))
     filtered_messages_text = "\n".join([qm['metadata']['text'] for qm in filtered_matches])
@@ -30,4 +41,7 @@ def smart_query(namespace, query):
                               f"information in the found messages: {filtered_messages_text}. Use messages that are "
                               f"relevant to the query, but don't include anything about that in your answer. Also give "
                               f"me an explanation of why each of the included messages is relevant.")
-    return gpt_query(summarize_answer_query)
+    stage_start_time = time.perf_counter()
+    response = gpt_query(summarize_answer_query)
+    logging.info(f"Smart Query: ChatGPT summarized the answer in {round(stage_start_time - time.perf_counter(), 2)}s")
+    return response
diff --git a/src/services/slack_service.py b/src/services/slack_service.py
@@ -171,7 +171,6 @@ def handle_app_mention(event, say):
         match = re.search(search_pattern, text)
         if match and smart_search_available:
             search_query = match.group(1)
-            logging.info(f"Executing Smart Query: {search_query}")
             response = smart_query(team_id, search_query)
         else:
             response = respond_to_user(messages, openAi_key, team_id)