From 4e6aa277c6d855ff636b6e77d891ac9055eb7a7e Mon Sep 17 00:00:00 2001
From: Yura Lukashik <YuraLukashik@gmail.com>
Date: Mon, 30 Oct 2023 00:53:38 +0300
Subject: [PATCH] Search: add logs to send_https_request

---
 .../external_services/pinecone.py               |  4 ++--
 .../semantic_search/https_requests.py           | 17 ++++++++++++++++-
 src/semantic_search/semantic_search/query.py    | 16 +++++++++++-----
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/semantic_search/semantic_search/external_services/pinecone.py b/src/semantic_search/semantic_search/external_services/pinecone.py
index a399909..e2d8efd 100644
--- a/src/semantic_search/semantic_search/external_services/pinecone.py
+++ b/src/semantic_search/semantic_search/external_services/pinecone.py
@@ -16,7 +16,7 @@ def get_pinecone_index() -> 'pinecone.Index':
     return pinecone.Index(get_pinecone_index_name())
 
 
-def query_index(query_vector, top_k, namespace, include_values, include_metadata):
+def query_index(query_vector, top_k, namespace, include_values, include_metadata, trace_id):
     host = f"{get_pinecone_index_name()}-0ddc4d6.svc.{get_pinecone_environment()}.pinecone.io"
     path = "/query"
     headers = {
@@ -31,4 +31,4 @@ def query_index(query_vector, top_k, namespace, include_values, include_metadata
         "includeValues": include_values,
         "namespace": namespace,
     }
-    return send_https_request(host, path, data, headers)
+    return send_https_request(host, path, data, headers, trace_id)
diff --git a/src/semantic_search/semantic_search/https_requests.py b/src/semantic_search/semantic_search/https_requests.py
index edff750..985b772 100644
--- a/src/semantic_search/semantic_search/https_requests.py
+++ b/src/semantic_search/semantic_search/https_requests.py
@@ -1,6 +1,9 @@
 import json
+import logging
 import socket
 import ssl
+import time
+import uuid
 
 
 def create_ssl_socket(host, port):
@@ -30,7 +33,11 @@ def read_all_from_socket(ssock):
     return buffer.decode()
 
 
-def send_https_request(host, path, data, headers=None):
+def send_https_request(host, path, data, headers=None, trace_id=None):
+    start_time = time.perf_counter_ns()
+    if trace_id is None:
+        trace_id = uuid.uuid4()
+    logging.info(f"send_https_request start, trace_id = {trace_id}")
     if headers is None:
         headers = {}
     json_data = json.dumps(data)
@@ -46,8 +53,16 @@ def send_https_request(host, path, data, headers=None):
     request += "".join(f"{key}: {value}\r\n" for key, value in request_headers.items())
     request += "\r\n" + json_data
     with create_ssl_socket(host, 443) as ssock:
+        logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] "
+                     f"socket created, trace_id = {trace_id}")
         ssock.send(request.encode())
+        logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] "
+                     f"data sent, trace_id = {trace_id}")
         raw_response = read_all_from_socket(ssock)
+        logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] "
+                     f"data received, trace_id = {trace_id}")
 
         status_code, status_line, response_headers, response_body = parse_http_response(raw_response)
+        logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] "
+                     f"response parsed, trace_id = {trace_id}")
         return status_code, status_line, response_headers, response_body
diff --git a/src/semantic_search/semantic_search/query.py b/src/semantic_search/semantic_search/query.py
index b2bfb5f..ecf455c 100644
--- a/src/semantic_search/semantic_search/query.py
+++ b/src/semantic_search/semantic_search/query.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import time
+import uuid
 from datetime import date
 from .external_services.pinecone import get_pinecone_index, query_index
 from .external_services.openai import create_embedding, gpt_query
@@ -29,11 +30,13 @@ def build_links_list(namespace: str, matches) -> str:
 
 
 def smart_query(namespace, query, username: str):
-    logging.info(f"Executing Smart Query: {query}")
+    trace_id = uuid.uuid4()
+    logging.info(f"Executing Smart Query: {query}, trace_id = {trace_id}")
 
     stage_start_time = time.perf_counter()
     query_vector = create_embedding(query)
-    logging.info(f"Smart Query: embedding created in {round(time.perf_counter() - stage_start_time, 2)}s")
+    logging.info(f"Smart Query: embedding created in {round(time.perf_counter() - stage_start_time, 2)}s, "
+                 f"trace_id = {trace_id}")
 
     stage_start_time = time.perf_counter()
     # query_results = get_pinecone_index().query(
@@ -48,9 +51,11 @@ def smart_query(namespace, query, username: str):
         top_k=50,
         namespace=namespace,
         include_values=False,
-        include_metadata=True
+        include_metadata=True,
+        trace_id=trace_id,
     )
-    logging.info(f"Smart Query: Pinecone search finished in {round(time.perf_counter() - stage_start_time, 2)}s")
+    logging.info(f"Smart Query: Pinecone search finished in {round(time.perf_counter() - stage_start_time, 2)}s, "
+                 f"trace_id = {trace_id}")
     # query_matches = query_results['results'][0]['matches']
     query_matches = json.loads(query_results[3])['matches']
 
@@ -89,7 +94,8 @@ def smart_query(namespace, query, username: str):
 
     stage_start_time = time.perf_counter()
     result = json.loads(gpt_query(prompt))
-    logging.info(f"Smart Query: Request to ChatGPT took {round(time.perf_counter() - stage_start_time, 2)}s")
+    logging.info(f"Smart Query: Request to ChatGPT took {round(time.perf_counter() - stage_start_time, 2)}s, "
+                 f"trace_id = {trace_id}")
 
     used_messages = list(filter(lambda match: match["id"] in result["messages"], query_matches))