diff --git a/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike-single-namespace.conf b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike-single-namespace.conf new file mode 100644 index 0000000..6ef0e1b --- /dev/null +++ b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike-single-namespace.conf @@ -0,0 +1,74 @@ +# Aerospike database configuration file for use with systemd. + +service { + cluster-name quote-demo + proto-fd-max 15000 +} + + +logging { + file /var/log/aerospike/aerospike.log { + context any info + } + + # Send log messages to stdout + console { + context any info + context query critical + } +} + +network { + service { + address any + port 3000 + } + + heartbeat { + mode multicast + multicast-group 239.1.99.222 + port 9918 + + # To use unicast-mesh heartbeats, remove the 3 lines above, and see + # aerospike_mesh.conf for alternative. + + interval 150 + timeout 10 + } + + fabric { + port 3001 + } + + info { + port 3003 + } +} + +namespace test { + replication-factor 1 + nsup-period 60 + + storage-engine device { + file /opt/aerospike/data/index.dat + filesize 16G + } +} + +namespace avs-meta { + replication-factor 1 + nsup-period 100 + + storage-engine memory { + data-size 1G + } + + # To use file storage backing, comment out the line above and use the + # following lines instead. +# storage-engine device { +# file /opt/aerospike/data/bar.dat +# filesize 16G +# data-in-memory true # Store data in memory in addition to file. +# } +} + diff --git a/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf index 8a01f56..08e127b 100644 --- a/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf +++ b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf @@ -45,13 +45,23 @@ network { } } -namespace test { +namespace avs-index { replication-factor 1 nsup-period 60 storage-engine device { - file /opt/aerospike/data/test.dat - filesize 16G + file /opt/aerospike/data/index.dat + filesize 8G + } +} + +namespace avs-data { + replication-factor 2 + nsup-period 60 + + storage-engine device { + file /opt/aerospike/data/data.dat + filesize 8G } } diff --git a/quote-semantic-search/docker-compose.yml b/quote-semantic-search/docker-compose.yml index 5cb143f..74af675 100644 --- a/quote-semantic-search/docker-compose.yml +++ b/quote-semantic-search/docker-compose.yml @@ -10,6 +10,8 @@ services: command: - "--config-file" - "/opt/aerospike/etc/aerospike/aerospike.conf" + # use this line to store all index and vector data in the default namespace (test) and set (quote-data) + # - "/opt/aerospike/etc/aerospike/aerospike-single-namespace.conf" healthcheck: # test: [ "CMD", "asinfo", "-U", "admin", "-P", "admin", "-p", "3000", "-v", "build" ] test: [ "CMD", "asinfo", "-p", "3000", "-v", "build" ] @@ -48,6 +50,11 @@ services: AVS_PORT: "5000" APP_NUM_QUOTES: "5000" GRPC_DNS_RESOLVER: native + # comment out the following lines to use the default namespace (test) and set (quote-data) to store all index and vector data + AVS_DATA_NAMESPACE: avs-data + AVS_DATA_SET: quote-data + AVS_INDEX_NAMESPACE: avs-index + AVS_INDEX_SET: quote-index networks: avs-demo: {} \ No newline at end of file diff --git a/quote-semantic-search/quote-search/config.py b/quote-semantic-search/quote-search/config.py index 7b4f378..ab938c0 100644 --- a/quote-semantic-search/quote-search/config.py +++ b/quote-semantic-search/quote-search/config.py @@ -21,8 +21,10 @@ class Config(object): AVS_PORT = int(os.environ.get("AVS_PORT") or 5000) AVS_ADVERTISED_LISTENER = os.environ.get("AVS_ADVERTISED_LISTENER") or None AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "quote-semantic-search" - AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test" - AVS_SET = os.environ.get("AVS_SET") or "quote-data" + AVS_DATA_NAMESPACE = os.environ.get("AVS_DATA_NAMESPACE") or "test" + AVS_DATA_SET = os.environ.get("AVS_DATA_SET") or "quote-data" + AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test" + AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "quote-data" AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True) AVS_MAX_RESULTS = int(os.environ.get("AVS_MAX_RESULTS") or 5) INDEXER_PARALLELISM = int(os.environ.get("APP_INDEXER_PARALLELISM") or 1) diff --git a/quote-semantic-search/quote-search/dataset_stats.py b/quote-semantic-search/quote-search/dataset_stats.py index 8dbabe0..65497ea 100644 --- a/quote-semantic-search/quote-search/dataset_stats.py +++ b/quote-semantic-search/quote-search/dataset_stats.py @@ -28,8 +28,8 @@ def collect_stats(): for quote_id in range(Config.NUM_QUOTES): # Check if record exists if avs_client.is_indexed( - namespace=Config.AVS_NAMESPACE, - set_name=Config.AVS_SET, + namespace=Config.AVS_DATA_NAMESPACE, + set_name=Config.AVS_DATA_SET, key=quote_id, index_name=Config.AVS_INDEX_NAME, ): diff --git a/quote-semantic-search/quote-search/indexer.py b/quote-semantic-search/quote-search/indexer.py index 3e9b849..e7303c1 100644 --- a/quote-semantic-search/quote-search/indexer.py +++ b/quote-semantic-search/quote-search/indexer.py @@ -41,18 +41,19 @@ def create_index(): try: for index in avs_admin_client.index_list(): if ( - index["id"]["namespace"] == Config.AVS_NAMESPACE + index["id"]["namespace"] == Config.AVS_DATA_NAMESPACE and index["id"]["name"] == Config.AVS_INDEX_NAME ): return avs_admin_client.index_create( - namespace=Config.AVS_NAMESPACE, + namespace=Config.AVS_DATA_NAMESPACE, name=Config.AVS_INDEX_NAME, - sets=Config.AVS_SET, + sets=Config.AVS_DATA_SET, vector_field="quote_embedding", dimensions=MODEL_DIM, vector_distance_metric=types.VectorDistanceMetric.COSINE, + index_storage=types.IndexStorage(namespace=Config.AVS_INDEX_NAMESPACE, set_name=Config.AVS_INDEX_SET), ) index_created = True @@ -109,8 +110,8 @@ def index_quote(id_quote): # Insert record try: avs_client.upsert( - namespace=Config.AVS_NAMESPACE, - set_name=Config.AVS_SET, + namespace=Config.AVS_DATA_NAMESPACE, + set_name=Config.AVS_DATA_SET, key=doc["quote_id"], record_data=doc, ) diff --git a/quote-semantic-search/quote-search/routes.py b/quote-semantic-search/quote-search/routes.py index caa27a2..e383338 100644 --- a/quote-semantic-search/quote-search/routes.py +++ b/quote-semantic-search/quote-search/routes.py @@ -55,8 +55,8 @@ def search_internal(): return "quote_id is required", 400 record = avs_client.get( - namespace=Config.AVS_NAMESPACE, - set_name=Config.AVS_SET, + namespace=Config.AVS_DATA_NAMESPACE, + set_name=Config.AVS_DATA_SET, key=int(quote_id), field_names=["quote_embedding"], ) @@ -75,7 +75,7 @@ def vector_search(embedding, count=Config.AVS_MAX_RESULTS): # Execute kNN search over the dataset field_names = ["quote_id", "quote", "author"] r = avs_client.vector_search( - namespace=Config.AVS_NAMESPACE, + namespace=Config.AVS_DATA_NAMESPACE, index_name=Config.AVS_INDEX_NAME, query=embedding, limit=count,