From 4e6aa277c6d855ff636b6e77d891ac9055eb7a7e Mon Sep 17 00:00:00 2001 From: Yura Lukashik Date: Mon, 30 Oct 2023 00:53:38 +0300 Subject: [PATCH] Search: add logs to send_https_request --- .../external_services/pinecone.py | 4 ++-- .../semantic_search/https_requests.py | 17 ++++++++++++++++- src/semantic_search/semantic_search/query.py | 16 +++++++++++----- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/semantic_search/semantic_search/external_services/pinecone.py b/src/semantic_search/semantic_search/external_services/pinecone.py index a399909..e2d8efd 100644 --- a/src/semantic_search/semantic_search/external_services/pinecone.py +++ b/src/semantic_search/semantic_search/external_services/pinecone.py @@ -16,7 +16,7 @@ def get_pinecone_index() -> 'pinecone.Index': return pinecone.Index(get_pinecone_index_name()) -def query_index(query_vector, top_k, namespace, include_values, include_metadata): +def query_index(query_vector, top_k, namespace, include_values, include_metadata, trace_id): host = f"{get_pinecone_index_name()}-0ddc4d6.svc.{get_pinecone_environment()}.pinecone.io" path = "/query" headers = { @@ -31,4 +31,4 @@ def query_index(query_vector, top_k, namespace, include_values, include_metadata "includeValues": include_values, "namespace": namespace, } - return send_https_request(host, path, data, headers) + return send_https_request(host, path, data, headers, trace_id) diff --git a/src/semantic_search/semantic_search/https_requests.py b/src/semantic_search/semantic_search/https_requests.py index edff750..985b772 100644 --- a/src/semantic_search/semantic_search/https_requests.py +++ b/src/semantic_search/semantic_search/https_requests.py @@ -1,6 +1,9 @@ import json +import logging import socket import ssl +import time +import uuid def create_ssl_socket(host, port): @@ -30,7 +33,11 @@ def read_all_from_socket(ssock): return buffer.decode() -def send_https_request(host, path, data, headers=None): +def send_https_request(host, path, data, headers=None, trace_id=None): + start_time = time.perf_counter_ns() + if trace_id is None: + trace_id = uuid.uuid4() + logging.info(f"send_https_request start, trace_id = {trace_id}") if headers is None: headers = {} json_data = json.dumps(data) @@ -46,8 +53,16 @@ def send_https_request(host, path, data, headers=None): request += "".join(f"{key}: {value}\r\n" for key, value in request_headers.items()) request += "\r\n" + json_data with create_ssl_socket(host, 443) as ssock: + logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] " + f"socket created, trace_id = {trace_id}") ssock.send(request.encode()) + logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] " + f"data sent, trace_id = {trace_id}") raw_response = read_all_from_socket(ssock) + logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] " + f"data received, trace_id = {trace_id}") status_code, status_line, response_headers, response_body = parse_http_response(raw_response) + logging.info(f"send_https_request [{((time.perf_counter_ns() - start_time) / 10**9):.3f}] " + f"response parsed, trace_id = {trace_id}") return status_code, status_line, response_headers, response_body diff --git a/src/semantic_search/semantic_search/query.py b/src/semantic_search/semantic_search/query.py index b2bfb5f..ecf455c 100644 --- a/src/semantic_search/semantic_search/query.py +++ b/src/semantic_search/semantic_search/query.py @@ -1,6 +1,7 @@ import json import logging import time +import uuid from datetime import date from .external_services.pinecone import get_pinecone_index, query_index from .external_services.openai import create_embedding, gpt_query @@ -29,11 +30,13 @@ def build_links_list(namespace: str, matches) -> str: def smart_query(namespace, query, username: str): - logging.info(f"Executing Smart Query: {query}") + trace_id = uuid.uuid4() + logging.info(f"Executing Smart Query: {query}, trace_id = {trace_id}") stage_start_time = time.perf_counter() query_vector = create_embedding(query) - logging.info(f"Smart Query: embedding created in {round(time.perf_counter() - stage_start_time, 2)}s") + logging.info(f"Smart Query: embedding created in {round(time.perf_counter() - stage_start_time, 2)}s, " + f"trace_id = {trace_id}") stage_start_time = time.perf_counter() # query_results = get_pinecone_index().query( @@ -48,9 +51,11 @@ def smart_query(namespace, query, username: str): top_k=50, namespace=namespace, include_values=False, - include_metadata=True + include_metadata=True, + trace_id=trace_id, ) - logging.info(f"Smart Query: Pinecone search finished in {round(time.perf_counter() - stage_start_time, 2)}s") + logging.info(f"Smart Query: Pinecone search finished in {round(time.perf_counter() - stage_start_time, 2)}s, " + f"trace_id = {trace_id}") # query_matches = query_results['results'][0]['matches'] query_matches = json.loads(query_results[3])['matches'] @@ -89,7 +94,8 @@ def smart_query(namespace, query, username: str): stage_start_time = time.perf_counter() result = json.loads(gpt_query(prompt)) - logging.info(f"Smart Query: Request to ChatGPT took {round(time.perf_counter() - stage_start_time, 2)}s") + logging.info(f"Smart Query: Request to ChatGPT took {round(time.perf_counter() - stage_start_time, 2)}s, " + f"trace_id = {trace_id}") used_messages = list(filter(lambda match: match["id"] in result["messages"], query_matches))