From d234efa0bedc78ed66fe97f8147368aca2d9ee15 Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Mon, 25 Mar 2024 13:24:09 -0700 Subject: [PATCH] Update param values and add new procedure (#249) Update param value to get optimal recall during nightly runs. Added a new procedure which includes index and merge segments. This will be helpful to prepare snapshot for search-only nightly runs. Signed-off-by: Vijayan Balasubramanian --- .../params/corpus/10million/faiss-cohere-768-dp.json | 9 +++++---- .../params/corpus/10million/lucene-cohere-768-dp.json | 9 +++++---- .../params/corpus/10million/nmslib-cohere-768-dp.json | 9 +++++---- .../params/corpus/1million/faiss-cohere-768-dp.json | 6 +++--- .../params/corpus/1million/lucene-cohere-768-dp.json | 6 +++--- .../params/corpus/1million/nmslib-cohere-768-dp.json | 6 +++--- vectorsearch/test_procedures/default.json | 9 +++++++++ 7 files changed, 33 insertions(+), 21 deletions(-) diff --git a/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json b/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json index 48b7644e..de653862 100644 --- a/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json +++ b/vectorsearch/params/corpus/10million/faiss-cohere-768-dp.json @@ -2,7 +2,8 @@ "target_index_name": "target_index", "target_field_name": "target_field", "target_index_body": "indices/faiss-index.json", - "target_index_primary_shards": 18, + "target_index_primary_shards": 6, + "target_index_replica_shards": 1, "target_index_dimension": 768, "target_index_space_type": "innerproduct", @@ -11,9 +12,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-10m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json b/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json index f0833bc3..a00d94a6 100644 --- a/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json +++ b/vectorsearch/params/corpus/10million/lucene-cohere-768-dp.json @@ -2,7 +2,8 @@ "target_index_name": "target_index", "target_field_name": "target_field", "target_index_body": "indices/lucene-index.json", - "target_index_primary_shards": 18, + "target_index_primary_shards": 6, + "target_index_replica_shards": 1, "target_index_dimension": 768, "target_index_space_type": "innerproduct", @@ -11,9 +12,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-10m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json b/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json index 05727aa9..417f54e5 100644 --- a/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json +++ b/vectorsearch/params/corpus/10million/nmslib-cohere-768-dp.json @@ -2,7 +2,8 @@ "target_index_name": "target_index", "target_field_name": "target_field", "target_index_body": "indices/nmslib-index.json", - "target_index_primary_shards": 18, + "target_index_primary_shards": 6, + "target_index_replica_shards": 1, "target_index_dimension": 768, "target_index_space_type": "innerproduct", @@ -11,9 +12,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-10m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json b/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json index fa657ee8..30bfa726 100644 --- a/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json +++ b/vectorsearch/params/corpus/1million/faiss-cohere-768-dp.json @@ -11,9 +11,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-1m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json b/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json index 8298b258..b4b12b4b 100644 --- a/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json +++ b/vectorsearch/params/corpus/1million/lucene-cohere-768-dp.json @@ -11,9 +11,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-1m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json b/vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json index 687bf07a..1cef5ff5 100644 --- a/vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json +++ b/vectorsearch/params/corpus/1million/nmslib-cohere-768-dp.json @@ -11,9 +11,9 @@ "target_index_bulk_index_data_set_corpus": "cohere-1m", "target_index_bulk_indexing_clients": 10, - "target_index_max_num_segments": 10, - "hnsw_ef_search": 100, - "hnsw_ef_construction": 100, + "target_index_max_num_segments": 1, + "hnsw_ef_search": 256, + "hnsw_ef_construction": 256, "query_k": 100, "query_body": { diff --git a/vectorsearch/test_procedures/default.json b/vectorsearch/test_procedures/default.json index db400ea7..db42455b 100644 --- a/vectorsearch/test_procedures/default.json +++ b/vectorsearch/test_procedures/default.json @@ -16,6 +16,15 @@ {{ benchmark.collect(parts="common/index-only-schedule.json") }} ] }, +{ + "name": "no-train-test-index-with-merge", + "description": "Perform only indexing operation for vector search", + "default": false, + "schedule": [ + {{ benchmark.collect(parts="common/index-only-schedule.json") }}, + {{ benchmark.collect(parts="common/force-merge-schedule.json") }}, + ] +}, { "name": "search-only", "default": false,