From f0669cabe92be688e2684bcfddab0af7d4d7735e Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Fri, 9 Dec 2022 16:48:47 -0800 Subject: [PATCH 1/2] Adding release configs for lucene filtering Signed-off-by: Martin Gaievski --- .../lucene-hnsw/relaxed-filter-spec.json | 42 ++++++++++++++++++ .../lucene-hnsw/relaxed-filter-test.yml | 33 ++++++++++++++ .../lucene-hnsw/restrictive-filter-spec.json | 44 +++++++++++++++++++ .../lucene-hnsw/restrictive-filter-test.yml | 33 ++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json new file mode 100644 index 000000000..fecde0392 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json @@ -0,0 +1,42 @@ +{ + "bool": + { + "should": + [ + { + "range": + { + "age": + { + "gte": 30, + "lte": 70 + } + } + }, + { + "term": + { + "color": "green" + } + }, + { + "term": + { + "color": "blue" + } + }, + { + "term": + { + "color": "yellow" + } + }, + { + "term": + { + "color": "sweet" + } + } + ] + } +} \ No newline at end of file diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml new file mode 100644 index 000000000..52f421bb2 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml @@ -0,0 +1,33 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest_multi_field + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + attributes_dataset_name: attributes + attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] + - name: refresh_index + index_name: target_index + - name: query_with_filter + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean-with-filters.hdf5 + neighbors_dataset: neighbors_filter_5 + filter_spec: /home/ec2-user/[PATH]/relaxed-filter-spec.json diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json new file mode 100644 index 000000000..9e6356f1c --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json @@ -0,0 +1,44 @@ +{ + "bool": + { + "must": + [ + { + "range": + { + "age": + { + "gte": 30, + "lte": 60 + } + } + }, + { + "term": + { + "taste": "bitter" + } + }, + { + "bool": + { + "should": + [ + { + "term": + { + "color": "blue" + } + }, + { + "term": + { + "color": "green" + } + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml new file mode 100644 index 000000000..308d6b042 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml @@ -0,0 +1,33 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: /home/ec2-user/[PATH]/index.json + - name: ingest_multi_field + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + attributes_dataset_name: attributes + attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] + - name: refresh_index + index_name: target_index + - name: query_with_filter + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + neighbors_format: hdf5 + neighbors_path: /home/ec2-user/data/sift-128-euclidean-with-filters.hdf5 + neighbors_dataset: neighbors_filter_4 + filter_spec: /home/ec2-user/[PATH]/restrictive-filter-test.yml From c6ed5df1c930c187c7c7a5a5438a3f07bf011956 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Tue, 13 Dec 2022 17:30:24 -0800 Subject: [PATCH 2/2] Move filter test configs to separate folders, replace ec2 paths with vars Signed-off-by: Martin Gaievski --- .../filtering/relaxed-filter/index.json | 26 +++++++++++++++++++ .../relaxed-filter}/relaxed-filter-spec.json | 0 .../relaxed-filter}/relaxed-filter-test.yml | 10 +++---- .../filtering/restrictive-filter/index.json | 26 +++++++++++++++++++ .../restrictive-filter-spec.json | 0 .../restrictive-filter-test.yml | 10 +++---- 6 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json rename benchmarks/perf-tool/release-configs/lucene-hnsw/{ => filtering/relaxed-filter}/relaxed-filter-spec.json (100%) rename benchmarks/perf-tool/release-configs/lucene-hnsw/{ => filtering/relaxed-filter}/relaxed-filter-test.yml (70%) create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json rename benchmarks/perf-tool/release-configs/lucene-hnsw/{ => filtering/restrictive-filter}/restrictive-filter-spec.json (100%) rename benchmarks/perf-tool/release-configs/lucene-hnsw/{ => filtering/restrictive-filter}/restrictive-filter-test.yml (69%) diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json new file mode 100644 index 000000000..7a9ff2890 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json @@ -0,0 +1,26 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json similarity index 100% rename from benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-spec.json rename to benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml similarity index 70% rename from benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml rename to benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml index 52f421bb2..a47782649 100644 --- a/benchmarks/perf-tool/release-configs/lucene-hnsw/relaxed-filter-test.yml +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml @@ -8,13 +8,13 @@ steps: index_name: target_index - name: create_index index_name: target_index - index_spec: /home/ec2-user/[PATH]/index.json + index_spec: [INDEX_SPEC_PATH]/index.json - name: ingest_multi_field index_name: target_index field_name: target_field bulk_size: 500 dataset_format: hdf5 - dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 attributes_dataset_name: attributes attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] - name: refresh_index @@ -26,8 +26,8 @@ steps: index_name: target_index field_name: target_field dataset_format: hdf5 - dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 neighbors_format: hdf5 - neighbors_path: /home/ec2-user/data/sift-128-euclidean-with-filters.hdf5 + neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5 neighbors_dataset: neighbors_filter_5 - filter_spec: /home/ec2-user/[PATH]/relaxed-filter-spec.json + filter_spec: [INDEX_SPEC_PATH]/relaxed-filter-spec.json diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json new file mode 100644 index 000000000..7a9ff2890 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json @@ -0,0 +1,26 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json similarity index 100% rename from benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-spec.json rename to benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml similarity index 69% rename from benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml rename to benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml index 308d6b042..61e55f113 100644 --- a/benchmarks/perf-tool/release-configs/lucene-hnsw/restrictive-filter-test.yml +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml @@ -8,13 +8,13 @@ steps: index_name: target_index - name: create_index index_name: target_index - index_spec: /home/ec2-user/[PATH]/index.json + index_spec: [INDEX_SPEC_PATH]/index.json - name: ingest_multi_field index_name: target_index field_name: target_field bulk_size: 500 dataset_format: hdf5 - dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 attributes_dataset_name: attributes attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] - name: refresh_index @@ -26,8 +26,8 @@ steps: index_name: target_index field_name: target_field dataset_format: hdf5 - dataset_path: /home/ec2-user/data/sift-128-euclidean-with-attr.hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 neighbors_format: hdf5 - neighbors_path: /home/ec2-user/data/sift-128-euclidean-with-filters.hdf5 + neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5 neighbors_dataset: neighbors_filter_4 - filter_spec: /home/ec2-user/[PATH]/restrictive-filter-test.yml + filter_spec: [INDEX_SPEC_PATH]/restrictive-filter-test.yml