Skip to content

Commit

Permalink
Merge branch 'opensearch-project:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
OVI3D0 authored Sep 12, 2024
2 parents cadc52e + 75c51a9 commit 975d1e0
Show file tree
Hide file tree
Showing 26 changed files with 1,300 additions and 5 deletions.
61 changes: 61 additions & 0 deletions nyc_taxis/workload.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,66 @@
import random
import datetime

async def delete_snapshot(opensearch, params):
await opensearch.snapshot.delete(repository=params["repository"], snapshot=params["snapshot"])

# Common helper functions
def random_money_values(max_value):
gte_cents = random.randrange(0, max_value*100)
lte_cents = random.randrange(gte_cents, max_value*100)
return {
"gte":gte_cents/100,
"lte":lte_cents/100
}

def random_dates(min_value, max_value, format_string, opensearch_query_format):
# arguments are datetime objects
min_timestamp = datetime.datetime.timestamp(min_value)
max_timestamp = datetime.datetime.timestamp(max_value)
diff = max_timestamp - min_timestamp
gte_fraction = random.uniform(0, 1)
lte_fraction = random.uniform(gte_fraction, 1.0)

gte_date = datetime.datetime.fromtimestamp(min_timestamp + int(gte_fraction * diff))
lte_date = datetime.datetime.fromtimestamp(min_timestamp + int(lte_fraction * diff))
return {
"gte":gte_date.strftime(format_string),
"lte":lte_date.strftime(format_string),
"format":opensearch_query_format
}

# Standard value sources for our operations
start_date = datetime.datetime(2015, 1, 1)
end_date = datetime.datetime(2015, 1, 15)

def total_amount_source():
return random_money_values(111.98)

def date_source_with_hours():
return random_dates(start_date, end_date, format_string="%Y-%m-%d %H:%M:%S", opensearch_query_format="yyyy-MM-dd HH:mm:ss")

def date_source_without_hours():
return random_dates(start_date, end_date, format_string="%d/%m/%Y", opensearch_query_format="dd/MM/yyyy")

def trip_distance_source():
gte = random.randint(0, 10)
lte = random.randint(gte, 20)
return {"gte":gte, "lte":lte}

def register(registry):
# Register standard value sources for range queries defined in operations/default.json.
# These are only used if --randomization-enabled is present.
registry.register_standard_value_source("range", "total_amount", total_amount_source)
registry.register_standard_value_source("distance_amount_agg", "trip_distance", trip_distance_source)
registry.register_standard_value_source("autohisto_agg", "dropoff_datetime", date_source_without_hours)
registry.register_standard_value_source("date_histogram_agg", "dropoff_datetime", date_source_without_hours)
registry.register_standard_value_source("date_histogram_calendar_interval", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("date_histogram_calendar_interval_with_tz", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("date_histogram_fixed_interval", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("date_histogram_fixed_interval_with_tz", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("date_histogram_fixed_interval_with_metrics", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("auto_date_histogram", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("auto_date_histogram_with_tz", "dropoff_datetime", date_source_with_hours)
registry.register_standard_value_source("auto_date_histogram_with_metrics", "dropoff_datetime", date_source_with_hours)

registry.register_runner("delete-snapshot", delete_snapshot, async_runner=True)
3 changes: 2 additions & 1 deletion vectorsearch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ This workload allows the following parameters to be specified using `--workload-
| Name | Description |
|-----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|
| target_index_name | Name of index to add vectors to |
| target_field_name | Name of field to add vectors to |
| target_field_name | Name of field to add vectors to. Use "." to indicate a nested field |
| target_index_body | Path to target index definition |
| target_index_primary_shards | Target index primary shards |
| target_index_replica_shards | Target index replica shards |
Expand Down Expand Up @@ -253,6 +253,7 @@ This workload allows the following parameters to be specified using `--workload-
| query_count | Number of queries for search operation |
| query_body | Json properties that will be merged with search body |
| search_clients | Number of clients to use for running queries |
| target_dataset_filter_attributes | Used in filter benchmarks. List of names of attribute fields in a dataset. |

#### Sample Outputs

Expand Down
62 changes: 62 additions & 0 deletions vectorsearch/indices/filters/faiss-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
{%- if train_model_id is defined %}
"model_id": "{{ train_model_id }}"
{%- else %}
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "faiss",
"parameters": {
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
"ef_search": {{ hnsw_ef_search }}
{%- endif %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,
{%- endif %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
{%- endif %}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
55 changes: 55 additions & 0 deletions vectorsearch/indices/filters/lucene-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,"knn.algo_param.ef_search": {{ hnsw_ef_search }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "lucene",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
55 changes: 55 additions & 0 deletions vectorsearch/indices/filters/nmslib-index-attributes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,"knn.algo_param.ef_search": {{ hnsw_ef_search }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "nmslib",
"parameters": {
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
35 changes: 35 additions & 0 deletions vectorsearch/indices/filters/script-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"settings": {
"index": {
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
"target_field": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }}
},
"color": {
"type": "text"
},
"taste": {
"type": "text"
},
"age": {
"type": "integer"
}
}
}
}
56 changes: 56 additions & 0 deletions vectorsearch/indices/nested/nested-faiss-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"settings": {
"index": {
"knn": true
{%- if target_index_primary_shards is defined and target_index_primary_shards %}
,"number_of_shards": {{ target_index_primary_shards }}
{%- endif %}
{%- if target_index_replica_shards is defined %}
,"number_of_replicas": {{ target_index_replica_shards }}
{%- endif %}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
{% if id_field_name is defined and id_field_name != "_id" %}
"{{id_field_name}}": {
"type": "keyword"
},
{%- endif %}
{% if target_field_name is defined and target_field_name %}
"{{ target_field_name.split('.')[0] }}": {
"type": "nested",
"properties": {
"{{ target_field_name.split('.')[1] }}": {
"type": "knn_vector",
"dimension": {{ target_index_dimension }},
"method": {
"name": "hnsw",
"space_type": "{{ target_index_space_type }}",
"engine": "faiss",
"parameters": {
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
"ef_search": {{ hnsw_ef_search }}
{%- endif %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
{%- if hnsw_ef_search is defined and hnsw_ef_search %}
,
{%- endif %}
"ef_construction": {{ hnsw_ef_construction }}
{%- endif %}
{%- if hnsw_m is defined and hnsw_m %}
{%- if hnsw_ef_construction is defined and hnsw_ef_construction %}
,
{%- endif %}
"m": {{ hnsw_m }}
{%- endif %}
}
}
}
}
}
{%- endif %}
}
}
}
Loading

0 comments on commit 975d1e0

Please sign in to comment.