Skip to content

Commit

Permalink
test: Add benchmark config files (#5093)
Browse files Browse the repository at this point in the history
* Add config files

* Add top-k and batch size to configs

* Add batch size to configs

* Add batch size to configs

* Remove configs using 1m docs
  • Loading branch information
bogdankostic authored Jun 14, 2023
1 parent 60e5d73 commit 7731713
Show file tree
Hide file tree
Showing 21 changed files with 749 additions and 0 deletions.
19 changes: 19 additions & 0 deletions test/benchmarks/configs/reader/debertabase.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: ignore

components:
- name: Reader
type: FARMReader
params:
model_name_or_path: deepset/deberta-v3-base-squad2
top_k: 10
return_no_answer: True

pipelines:
- name: querying
nodes:
- name: Reader
inputs: [Query]

benchmark_config:
data_url: https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-downstream/squad20.tar.gz
labels_file: data/squad20/dev-v2.0.json
19 changes: 19 additions & 0 deletions test/benchmarks/configs/reader/debertalarge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: ignore

components:
- name: Reader
type: FARMReader
params:
model_name_or_path: deepset/deberta-v3-large-squad2
top_k: 10
return_no_answer: True

pipelines:
- name: querying
nodes:
- name: Reader
inputs: [Query]

benchmark_config:
data_url: https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-downstream/squad20.tar.gz
labels_file: data/squad20/dev-v2.0.json
19 changes: 19 additions & 0 deletions test/benchmarks/configs/reader/tinyroberta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: ignore

components:
- name: Reader
type: FARMReader
params:
model_name_or_path: deepset/tinyroberta-squad2
top_k: 10
return_no_answer: True

pipelines:
- name: querying
nodes:
- name: Reader
inputs: [Query]

benchmark_config:
data_url: https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-downstream/squad20.tar.gz
labels_file: data/squad20/dev-v2.0.json
31 changes: 31 additions & 0 deletions test/benchmarks/configs/retriever/bm25-elasticsearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: ignore

components:
- name: DocumentStore
type: ElasticsearchDocumentStore
- name: TextConverter
type: TextConverter
- name: Retriever
type: BM25Retriever
params:
document_store: DocumentStore
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
31 changes: 31 additions & 0 deletions test/benchmarks/configs/retriever/bm25-opensearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: ignore

components:
- name: DocumentStore
type: OpenSearchDocumentStore
- name: TextConverter
type: TextConverter
- name: Retriever
type: BM25Retriever
params:
document_store: DocumentStore
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
34 changes: 34 additions & 0 deletions test/benchmarks/configs/retriever/bm25-weaviate-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
version: ignore

components:
- name: DocumentStore
type: WeaviateDocumentStore
params:
timeout_config: [5, 300]
batch_size: 5000
- name: TextConverter
type: TextConverter
- name: Retriever
type: BM25Retriever
params:
document_store: DocumentStore
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
36 changes: 36 additions & 0 deletions test/benchmarks/configs/retriever/minilm-elasticsearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: ignore

components:
- name: DocumentStore
type: ElasticsearchDocumentStore
params:
batch_size: 5000
similarity: cosine
embedding_dim: 384
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
36 changes: 36 additions & 0 deletions test/benchmarks/configs/retriever/minilm-opensearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: ignore

components:
- name: DocumentStore
type: OpenSearchDocumentStore
params:
batch_size: 5000
similarity: cosine
embedding_dim: 384
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
37 changes: 37 additions & 0 deletions test/benchmarks/configs/retriever/minilm-weaviate-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
version: ignore

components:
- name: DocumentStore
type: WeaviateDocumentStore
params:
timeout_config: [5, 300]
similarity: cosine
embedding_dim: 384
batch_size: 5000
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
36 changes: 36 additions & 0 deletions test/benchmarks/configs/retriever/mpnetbase-elasticsearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: ignore

components:
- name: DocumentStore
type: ElasticsearchDocumentStore
params:
batch_size: 5000
similarity: dot_product
embedding_dim: 768
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-mpnet-base-dot-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
36 changes: 36 additions & 0 deletions test/benchmarks/configs/retriever/mpnetbase-opensearch-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: ignore

components:
- name: DocumentStore
type: OpenSearchDocumentStore
params:
batch_size: 5000
similarity: dot_product
embedding_dim: 768
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-mpnet-base-dot-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
37 changes: 37 additions & 0 deletions test/benchmarks/configs/retriever/mpnetbase-weaviate-100k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
version: ignore

components:
- name: DocumentStore
type: WeaviateDocumentStore
params:
timeout_config: [5, 300]
similarity: dot_product
embedding_dim: 768
batch_size: 5000
- name: TextConverter
type: TextConverter
- name: Retriever
type: EmbeddingRetriever
params:
document_store: DocumentStore
embedding_model: sentence-transformers/multi-qa-mpnet-base-dot-v1
top_k: 10

pipelines:
- name: indexing
nodes:
- name: TextConverter
inputs: [File]
- name: Retriever
inputs: [TextConverter]
- name: DocumentStore
inputs: [Retriever]
- name: querying
nodes:
- name: Retriever
inputs: [Query]

benchmark_config:
data_url: https://deepset-test-datasets.s3.eu-central-1.amazonaws.com/msmarco.100_000.tar.bz2
documents_directory: data/msmarco.100_000/txt
labels_file: data/msmarco.100_000/evalsets/evalset_full.csv
Loading

0 comments on commit 7731713

Please sign in to comment.