-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathquery_pinecone.py
75 lines (58 loc) · 2.52 KB
/
query_pinecone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import argparse
import logging
from pinecone import Pinecone
from openai import OpenAI
import dotenv
# Load environment variables from .env file
dotenv.load_dotenv()
# Configure the logging system
logging.basicConfig(filename='query_pinecone.log', # Log file path
filemode='w', # 'a' for append, 'w' for overwrite
format='%(asctime)s - %(levelname)s - %(message)s', # Format of log messages
level=logging.INFO) # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
# Initialize OpenAI
oai = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
EMBEDDING_MODEL = os.getenv('OPENAI_EMBEDDING_MODEL', "text-embedding-3-small")
def embed_query(query: str, model=EMBEDDING_MODEL):
"""Embeds a query using the specified model."""
embedding = oai.embeddings.create(input=[query], model=model).data[0].embedding
return embedding
def query_pinecone(query: str, index_name: str, namespace: str, top_k: int = 10):
"""Queries the Pinecone index with the provided query."""
# Embed the query
dense_embedding = embed_query(query)
# Initialize the index
index = pc.Index(index_name)
# Perform the query
response = index.query(
vector=[dense_embedding],
top_k=top_k,
include_metadata=True,
namespace=namespace
)
return response
def main():
parser = argparse.ArgumentParser(description="Query Pinecone with a text input.")
parser.add_argument('-query', '--query_text', required=True, help="Text input for the query.")
parser.add_argument('-index', '--index_name', required=True, help="Name of the Pinecone index.")
parser.add_argument('-top_k', '--top_k_results', type=int, default=10, help="Number of top results to retrieve.")
parser.add_argument('-ns', '--namespace', required=True, help="Namespace for Pinecone index.")
args = parser.parse_args()
query_text = args.query_text
index_name = args.index_name
top_k = args.top_k_results
namespace = args.namespace
response = query_pinecone(query_text, index_name, namespace, top_k)
print("Query results:")
for match in response['matches']:
print(f"Score: {match['score']}")
print(f"Title: {match['metadata']['title']}")
print(f"URL: {match['metadata']['url']}")
print(f"Text: {match['metadata']['body']}")
print("-" * 80)
logging.info("Query results: %s", response)
if __name__ == "__main__":
main()