From dfa79b2ef60e650442f0a7abbce9ec9fee54976b Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Fri, 16 Sep 2022 13:56:00 -0700 Subject: [PATCH] Added sample perf-test configs for faiss-ivf, faiss-ivfpq, lucene-hnsw (#555) Signed-off-by: Navneet Verma Signed-off-by: Navneet Verma --- .../release-configs/faiss-ivf/index.json | 17 ++++++ .../faiss-ivf/method-spec.json | 9 ++++ .../release-configs/faiss-ivf/test.yml | 53 +++++++++++++++++++ .../faiss-ivf/train-index-spec.json | 16 ++++++ .../release-configs/faiss-ivfpq/index.json | 17 ++++++ .../faiss-ivfpq/method-spec.json | 16 ++++++ .../release-configs/faiss-ivfpq/test.yml | 53 +++++++++++++++++++ .../faiss-ivfpq/train-index-spec.json | 16 ++++++ .../release-configs/lucene-hnsw/index.json | 26 +++++++++ .../release-configs/lucene-hnsw/test.yml | 29 ++++++++++ .../release-configs/nmslib-hnsw/index.json | 27 ++++++++++ .../release-configs/nmslib-hnsw/test.yml | 29 ++++++++++ 12 files changed, 308 insertions(+) create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivf/index.json create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivf/method-spec.json create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivf/test.yml create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivf/train-index-spec.json create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivfpq/index.json create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivfpq/method-spec.json create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivfpq/test.yml create mode 100644 benchmarks/perf-tool/release-configs/faiss-ivfpq/train-index-spec.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/index.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/test.yml create mode 100644 benchmarks/perf-tool/release-configs/nmslib-hnsw/index.json create mode 100644 benchmarks/perf-tool/release-configs/nmslib-hnsw/test.yml diff --git a/benchmarks/perf-tool/release-configs/faiss-ivf/index.json b/benchmarks/perf-tool/release-configs/faiss-ivf/index.json new file mode 100644 index 000000000..479703412 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivf/index.json @@ -0,0 +1,17 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "model_id": "test-model" + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/faiss-ivf/method-spec.json b/benchmarks/perf-tool/release-configs/faiss-ivf/method-spec.json new file mode 100644 index 000000000..51ae89877 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivf/method-spec.json @@ -0,0 +1,9 @@ +{ + "name":"ivf", + "engine":"faiss", + "space_type": "l2", + "parameters":{ + "nlist": 128, + "nprobes": 8 + } +} diff --git a/benchmarks/perf-tool/release-configs/faiss-ivf/test.yml b/benchmarks/perf-tool/release-configs/faiss-ivf/test.yml new file mode 100644 index 000000000..ce6b78867 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivf/test.yml @@ -0,0 +1,53 @@ +endpoint: [END-POINT] +test_name: "index-workflow" +test_id: "index workflow" +num_runs: 10 +show_runs: false +setup: + - name: delete_index + index_name: train_index + - name: create_index + index_name: train_index + index_spec: /home/ec2-user/[PATH]/train-index-spec.json + - name: ingest + index_name: train_index + field_name: train_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/[PATH]/sift-128-euclidean.hdf5 + doc_count: 50000 + - name: refresh_index + index_name: train_index +steps: + - name: delete_model + model_id: test-model + - name: delete_index + index_name: target_index + - name: train_model + model_id: test-model + train_index: train_index + train_field: train_field + dimension: 128 + method_spec: /home/ec2-user/[PATH]/method-spec.json + max_training_vector_count: 50000 + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + - name: refresh_index + index_name: target_index + - name: query + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean.hdf5 diff --git a/benchmarks/perf-tool/release-configs/faiss-ivf/train-index-spec.json b/benchmarks/perf-tool/release-configs/faiss-ivf/train-index-spec.json new file mode 100644 index 000000000..804a5707e --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivf/train-index-spec.json @@ -0,0 +1,16 @@ +{ + "settings": { + "index": { + "number_of_shards": 24, + "number_of_replicas": 0 + } + }, + "mappings": { + "properties": { + "train_field": { + "type": "knn_vector", + "dimension": 128 + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/faiss-ivfpq/index.json b/benchmarks/perf-tool/release-configs/faiss-ivfpq/index.json new file mode 100644 index 000000000..479703412 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivfpq/index.json @@ -0,0 +1,17 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "model_id": "test-model" + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/faiss-ivfpq/method-spec.json b/benchmarks/perf-tool/release-configs/faiss-ivfpq/method-spec.json new file mode 100644 index 000000000..204b0a653 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivfpq/method-spec.json @@ -0,0 +1,16 @@ +{ + "name":"ivf", + "engine":"faiss", + "space_type": "l2", + "parameters":{ + "nlist": 128, + "nprobes": 8, + "encoder": { + "name": "pq", + "parameters": { + "m": 16, + "code_size": 8 + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/faiss-ivfpq/test.yml b/benchmarks/perf-tool/release-configs/faiss-ivfpq/test.yml new file mode 100644 index 000000000..dd88affc3 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivfpq/test.yml @@ -0,0 +1,53 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "index workflow" +num_runs: 10 +show_runs: false +setup: + - name: delete_index + index_name: train_index + - name: create_index + index_name: train_index + index_spec: /home/ec2-user/[PATH]/train-index-spec.json + - name: ingest + index_name: train_index + field_name: train_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + doc_count: 50000 + - name: refresh_index + index_name: train_index +steps: + - name: delete_model + model_id: test-model + - name: delete_index + index_name: target_index + - name: train_model + model_id: test-model + train_index: train_index + train_field: train_field + dimension: 128 + method_spec: /home/ec2-user/[PATH]/method-spec.json + max_training_vector_count: 50000 + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + - name: refresh_index + index_name: target_index + - name: query + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean.hdf5 diff --git a/benchmarks/perf-tool/release-configs/faiss-ivfpq/train-index-spec.json b/benchmarks/perf-tool/release-configs/faiss-ivfpq/train-index-spec.json new file mode 100644 index 000000000..804a5707e --- /dev/null +++ b/benchmarks/perf-tool/release-configs/faiss-ivfpq/train-index-spec.json @@ -0,0 +1,16 @@ +{ + "settings": { + "index": { + "number_of_shards": 24, + "number_of_replicas": 0 + } + }, + "mappings": { + "properties": { + "train_field": { + "type": "knn_vector", + "dimension": 128 + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/index.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/index.json new file mode 100644 index 000000000..7a9ff2890 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/index.json @@ -0,0 +1,26 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/test.yml new file mode 100644 index 000000000..96b991325 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/test.yml @@ -0,0 +1,29 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + - name: refresh_index + index_name: target_index + - name: query + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean.hdf5 diff --git a/benchmarks/perf-tool/release-configs/nmslib-hnsw/index.json b/benchmarks/perf-tool/release-configs/nmslib-hnsw/index.json new file mode 100644 index 000000000..fa88192f6 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/nmslib-hnsw/index.json @@ -0,0 +1,27 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1, + "knn.algo_param.ef_search": 256 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "nmslib", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/nmslib-hnsw/test.yml b/benchmarks/perf-tool/release-configs/nmslib-hnsw/test.yml new file mode 100644 index 000000000..96b991325 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/nmslib-hnsw/test.yml @@ -0,0 +1,29 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + - name: refresh_index + index_name: target_index + - name: query + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean.hdf5