Skip to content

Commit

Permalink
feat: vec-270 support separate data and index namespaces in the quote…
Browse files Browse the repository at this point in the history
… search example
  • Loading branch information
dwelch-spike committed Aug 1, 2024
1 parent 2204223 commit 69b2c6c
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Aerospike database configuration file for use with systemd.

service {
cluster-name quote-demo
proto-fd-max 15000
}


logging {
file /var/log/aerospike/aerospike.log {
context any info
}

# Send log messages to stdout
console {
context any info
context query critical
}
}

network {
service {
address any
port 3000
}

heartbeat {
mode multicast
multicast-group 239.1.99.222
port 9918

# To use unicast-mesh heartbeats, remove the 3 lines above, and see
# aerospike_mesh.conf for alternative.

interval 150
timeout 10
}

fabric {
port 3001
}

info {
port 3003
}
}

namespace test {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/index.dat
filesize 16G
}
}

namespace avs-meta {
replication-factor 1
nsup-period 100

storage-engine memory {
data-size 1G
}

# To use file storage backing, comment out the line above and use the
# following lines instead.
# storage-engine device {
# file /opt/aerospike/data/bar.dat
# filesize 16G
# data-in-memory true # Store data in memory in addition to file.
# }
}

Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,23 @@ network {
}
}

namespace test {
namespace avs-index {
replication-factor 1
nsup-period 60

storage-engine device {
file /opt/aerospike/data/test.dat
filesize 16G
file /opt/aerospike/data/index.dat
filesize 8G
}
}

namespace avs-data {
replication-factor 2
nsup-period 60

storage-engine device {
file /opt/aerospike/data/data.dat
filesize 8G
}
}

Expand Down
7 changes: 7 additions & 0 deletions quote-semantic-search/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ services:
command:
- "--config-file"
- "/opt/aerospike/etc/aerospike/aerospike.conf"
# use this line to store all index and vector data in the default namespace (test) and set (quote-data)
# - "/opt/aerospike/etc/aerospike/aerospike-single-namespace.conf"
healthcheck:
# test: [ "CMD", "asinfo", "-U", "admin", "-P", "admin", "-p", "3000", "-v", "build" ]
test: [ "CMD", "asinfo", "-p", "3000", "-v", "build" ]
Expand Down Expand Up @@ -48,6 +50,11 @@ services:
AVS_PORT: "5000"
APP_NUM_QUOTES: "5000"
GRPC_DNS_RESOLVER: native
# comment out the following lines to use the default namespace (test) and set (quote-data) to store all index and vector data
AVS_DATA_NAMESPACE: avs-data
AVS_DATA_SET: quote-data
AVS_INDEX_NAMESPACE: avs-index
AVS_INDEX_SET: quote-index

networks:
avs-demo: {}
6 changes: 4 additions & 2 deletions quote-semantic-search/quote-search/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ class Config(object):
AVS_PORT = int(os.environ.get("AVS_PORT") or 5000)
AVS_ADVERTISED_LISTENER = os.environ.get("AVS_ADVERTISED_LISTENER") or None
AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "quote-semantic-search"
AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test"
AVS_SET = os.environ.get("AVS_SET") or "quote-data"
AVS_DATA_NAMESPACE = os.environ.get("AVS_DATA_NAMESPACE") or "test"
AVS_DATA_SET = os.environ.get("AVS_DATA_SET") or "quote-data"
AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test"
AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "quote-data"
AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True)
AVS_MAX_RESULTS = int(os.environ.get("AVS_MAX_RESULTS") or 5)
INDEXER_PARALLELISM = int(os.environ.get("APP_INDEXER_PARALLELISM") or 1)
Expand Down
4 changes: 2 additions & 2 deletions quote-semantic-search/quote-search/dataset_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def collect_stats():
for quote_id in range(Config.NUM_QUOTES):
# Check if record exists
if avs_client.is_indexed(
namespace=Config.AVS_NAMESPACE,
set_name=Config.AVS_SET,
namespace=Config.AVS_DATA_NAMESPACE,
set_name=Config.AVS_DATA_SET,
key=quote_id,
index_name=Config.AVS_INDEX_NAME,
):
Expand Down
11 changes: 6 additions & 5 deletions quote-semantic-search/quote-search/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,19 @@ def create_index():
try:
for index in avs_admin_client.index_list():
if (
index["id"]["namespace"] == Config.AVS_NAMESPACE
index["id"]["namespace"] == Config.AVS_DATA_NAMESPACE
and index["id"]["name"] == Config.AVS_INDEX_NAME
):
return

avs_admin_client.index_create(
namespace=Config.AVS_NAMESPACE,
namespace=Config.AVS_DATA_NAMESPACE,
name=Config.AVS_INDEX_NAME,
sets=Config.AVS_SET,
sets=Config.AVS_DATA_SET,
vector_field="quote_embedding",
dimensions=MODEL_DIM,
vector_distance_metric=types.VectorDistanceMetric.COSINE,
index_storage=types.IndexStorage(namespace=Config.AVS_INDEX_NAMESPACE, set_name=Config.AVS_INDEX_SET),
)

index_created = True
Expand Down Expand Up @@ -109,8 +110,8 @@ def index_quote(id_quote):
# Insert record
try:
avs_client.upsert(
namespace=Config.AVS_NAMESPACE,
set_name=Config.AVS_SET,
namespace=Config.AVS_DATA_NAMESPACE,
set_name=Config.AVS_DATA_SET,
key=doc["quote_id"],
record_data=doc,
)
Expand Down
6 changes: 3 additions & 3 deletions quote-semantic-search/quote-search/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def search_internal():
return "quote_id is required", 400

record = avs_client.get(
namespace=Config.AVS_NAMESPACE,
set_name=Config.AVS_SET,
namespace=Config.AVS_DATA_NAMESPACE,
set_name=Config.AVS_DATA_SET,
key=int(quote_id),
field_names=["quote_embedding"],
)
Expand All @@ -75,7 +75,7 @@ def vector_search(embedding, count=Config.AVS_MAX_RESULTS):
# Execute kNN search over the dataset
field_names = ["quote_id", "quote", "author"]
r = avs_client.vector_search(
namespace=Config.AVS_NAMESPACE,
namespace=Config.AVS_DATA_NAMESPACE,
index_name=Config.AVS_INDEX_NAME,
query=embedding,
limit=count,
Expand Down

0 comments on commit 69b2c6c

Please sign in to comment.