Merge branch 'embeddings-benchmark:main' into main

Lyon-NLP · Jun 3, 2024 · 8ecb34e · 8ecb34e
2 parents 89ca261 + 36703b0
commit 8ecb34e
Show file tree

Hide file tree

Showing 964 changed files with 6,216 additions and 427,637 deletions.
diff --git a/.gitignore b/.gitignore
@@ -137,3 +137,6 @@ error_logs.txt
 # tests
 tests/results
 tmp.py
+
+# sandbox
+sb.ipynb
diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ pip install mteb
 * Using a python script (see [scripts/run_mteb_english.py](https://github.com/embeddings-benchmark/mteb/blob/main/scripts/run_mteb_english.py) and [mteb/mtebscripts](https://github.com/embeddings-benchmark/mtebscripts) for more):
 
 ```python
-from mteb import MTEB
+import mteb
 from sentence_transformers import SentenceTransformer
 
 # Define the sentence-transformers model name
@@ -50,7 +50,8 @@ model_name = "average_word_embeddings_komninos"
 # model_name = "sentence-transformers/all-MiniLM-L6-v2"
 
 model = SentenceTransformer(model_name)
-evaluation = MTEB(tasks=["Banking77Classification"])
+tasks = mteb.get_tasks(tasks=["Banking77Classification"])
+evaluation = mteb.MTEB(tasks=tasks)
 results = evaluation.run(model, output_folder=f"results/{model_name}")
 ```
 
@@ -84,37 +85,46 @@ Datasets can be selected by providing the list of datasets, but also
 * by their task (e.g. "Clustering" or "Classification")
 
 ```python
-evaluation = MTEB(task_types=['Clustering', 'Retrieval']) # Only select clustering and retrieval tasks
+tasks = mteb.get_tasks(task_types=["Clustering", "Retrieval"]) # Only select clustering and retrieval tasks
 ```
 
-* by their categories e.g. "S2S" (sentence to sentence) or "P2P" (paragraph to paragraph)
+* by their categories e.g. "s2s" (sentence to sentence) or "p2p" (paragraph to paragraph)
 
 ```python
-evaluation = MTEB(task_categories=['S2S']) # Only select sentence2sentence datasets
+tasks = mteb.get_tasks(categories=["s2s", "p2p"]) # Only select sentence2sentence and paragraph2paragraph datasets
 ```
 
 * by their languages
 
 ```python
-evaluation = MTEB(task_langs=["en", "de"]) # Only select datasets which are "en", "de" or "en-de"
+tasks = mteb.get_tasks(languages=["eng", "deu"]) # Only select datasets which contain "eng" or "deu" (iso 639-3 codes)
 ```
 
 You can also specify which languages to load for multilingual/cross-lingual tasks like below:
 
 ```python
+import mteb
+
+tasks = [
+    mteb.get_task("AmazonReviewsClassification", languages = ["eng", "fra"]),
+    mteb.get_task("BUCCBitextMining", languages = ["deu"]), # all subsets containing "deu"
+]
+
+# or you can select specific huggingface subsets like this:
 from mteb.tasks import AmazonReviewsClassification, BUCCBitextMining
 
-evaluation = MTEB(tasks=[
+evaluation = mteb.MTEB(tasks=[
         AmazonReviewsClassification(hf_subsets=["en", "fr"]) # Only load "en" and "fr" subsets of Amazon Reviews
         BUCCBitextMining(hf_subsets=["de-en"]), # Only load "de-en" subset of BUCC
 ])
+# for an example of a HF subset see "Subset" in the dataset viewer at: https://huggingface.co/datasets/mteb/bucc-bitext-mining
 ```
 
 There are also presets available for certain task collections, e.g. to select the 56 English datasets that form the "Overall MTEB English leaderboard":
 
 ```python
 from mteb import MTEB_MAIN_EN
-evaluation = MTEB(tasks=MTEB_MAIN_EN, task_langs=["en"])
+evaluation = mteb.MTEB(tasks=MTEB_MAIN_EN, task_langs=["en"])
 ```
 
 
@@ -148,7 +158,8 @@ class MyModel():
         pass
 
 model = MyModel()
-evaluation = MTEB(tasks=["Banking77Classification"])
+tasks = mteb.get_task("Banking77Classification")
+evaluation = MTEB(tasks=tasks)
 evaluation.run(model)
 ```
 

diff --git a/docs/mmteb/points.md b/docs/mmteb/points.md
@@ -65,7 +65,7 @@ Please also add your first name and last name are as you want them to appear in
 | guangyusong       | Guangyu    | Song       | [email protected]         | ~Guangyu_Song1       | N/A                            |
 | davidstap.        | David      | Stap       | [email protected]            | ~David_Stap          | University of Amsterdam.                         |
 | HLasse            | Lasse      | Hansen     | [email protected]         | ~Lasse_Hansen2       | Aarhus University, Denmark                            |
-| jaygala24         | Jay        | Gala       | [email protected]          | ~Jay_Gala1           | Nilekani Center at AI4Bharat                          |
+| jaygala24         | Jay        | Gala       | [email protected]          | ~Jay_Gala1           | MBZUAI                          |
 | digantamisra      | Diganta    | Misra      | [email protected]    | ~Diganta_Misra1       | Mila - Quebec AI Institute                           |
 | PranjalChitale    | Pranjal    | Chitale    | [email protected]    | ~Pranjal_A_Chitale1       | Indian Institute of Technology Madras            |
 | Akash190104       | Akash      | Kundu      | [email protected]      |~Akash_Kundu2             | Heritage Institute of Technology, Kolkata && Apart Research |
@@ -88,4 +88,6 @@ Please also add your first name and last name are as you want them to appear in
 | ShawonAshraf      | Shawon     | Ashraf     | [email protected]          |   ~Shawon_Ashraf1    | ellamind, Germany                                   |
 | bjoernpl          | Björn      | Plüster    | [email protected]          |  ~Björn_Plüster1     | ellamind, Germany                                   |
 | jphme             | Jan Philipp| Harries    | [email protected]             |~Jan_Philipp_Harries1 | ellamind, Germany                                   |
-| malteos           | Malte       | Ostendorff      | [email protected]           | ~Malte_Ostendorff1| Occiglot                             |
+| malteos           | Malte       | Ostendorff      | [email protected]           | ~Malte_Ostendorff1| Occiglot                             |
+| ManuelFay         | Manuel        | Faysse     | [email protected] |              ~Manuel_Faysse1        | CentraleSupélec & Illuin Technology                  |
+| hgissbkh          | Hippolyte     | Gisserot-Boukhlef    | [email protected]        |   ~Hippolyte_Gisserot-Boukhlef1                   | CentraleSupélec & Artefact   |
diff --git a/docs/mmteb/points/629.jsonl b/docs/mmteb/points/629.jsonl
@@ -1,2 +1,2 @@
 {"GitHub": "dokato", "New dataset": 2}
-{"GitHub": "isaac-chung ", "Review PR": 2}
+{"GitHub": "isaac-chung", "Review PR": 2}
diff --git a/docs/mmteb/points/718.jsonl b/docs/mmteb/points/718.jsonl
@@ -1,5 +1,5 @@
-{"GitHub": "rasdani", "New dataset": 20}
-{"GitHub": "ShawonAshraf", "New dataset": 26}
-{"GitHub": "bjoernpl", "New dataset": 26}
-{"GitHub": "jphme", "New dataset": 26}
+{"GitHub": "rasdani", "New dataset": 22}
+{"GitHub": "ShawonAshraf", "New dataset": 28}
+{"GitHub": "bjoernpl", "New dataset": 28}
+{"GitHub": "jphme", "New dataset": 28}
 {"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/779.jsonl b/docs/mmteb/points/779.jsonl
@@ -0,0 +1,4 @@
+{"GitHub": "xhluca", "New dataset": 2}
+{"GitHub": "xhluca", "Bug fixes": 2}
+{"GitHub": "vaibhavad", "Review PR": 2}
+{"GitHub": "orionw", "Review PR": 2}
diff --git a/docs/mmteb/points/799.jsonl b/docs/mmteb/points/799.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "vaibhavad", "New dataset": 2}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/806.jsonl b/docs/mmteb/points/806.jsonl
@@ -0,0 +1,4 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "imenelydiaker", "Review PR": 2}
+{"GitHub": "Muennighoff", "Review PR": 2}
+{"GitHub": "KennethEnevoldsen", "Bug fixes": 4}
diff --git a/docs/mmteb/points/807.jsonl b/docs/mmteb/points/807.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "Muennighoff", "Review PR": 2}
+{"GitHub": "KennethEnevoldsen", "Bug fixes": 5}
diff --git a/docs/mmteb/points/815.jsonl b/docs/mmteb/points/815.jsonl
@@ -0,0 +1,5 @@
+{"GitHub": "artemsnegirev", "New dataset": 8}
+{"GitHub": "MariyaTikhonova", "New dataset": 6}
+{"GitHub": "anpalmak2003", "New dataset": 6}
+{"GitHub": "Alenush", "New dataset": 4}
+{"GitHub": "ab1992ao", "New dataset": 4}
diff --git a/docs/mmteb/points/826.jsonl b/docs/mmteb/points/826.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "imenelydiaker", "Review PR": 2}
+{"GitHub": "Muennighoff", "Review PR": 2}
+{"GitHub": "KennethEnevoldsen", "Bug fixes": 4}
diff --git a/docs/mmteb/points/827.jsonl b/docs/mmteb/points/827.jsonl
@@ -0,0 +1,3 @@
+{"GitHub": "isaac-chung", "Review PR": 2}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
+{"GitHub": "imenelydiaker", "Bug fixes": 2}
diff --git a/docs/mmteb/points/832.jsonl b/docs/mmteb/points/832.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Bug fixes": 4}
+{"GitHub": "imenelydiaker", "Review PR": 2}
diff --git a/docs/mmteb/points/842.jsonl b/docs/mmteb/points/842.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Bug fixes": 3}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/854.jsonl b/docs/mmteb/points/854.jsonl
@@ -0,0 +1,5 @@
+{"GitHub": "ManuelFay", "Bug fixes": 13,  "New task": 5}
+{"GitHub": "hgissbkh", "Bug fixes": 13, "New task": 5}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
+{"GitHub": "imenelydiaker", "Review PR": 2}
+{"GitHub": "orionw", "Review PR": 2}
diff --git a/docs/mmteb/points/862.jsonl b/docs/mmteb/points/862.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Bug fixes": 3}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/docs/mmteb/points/865.jsonl b/docs/mmteb/points/865.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Bug fixes": 4}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "vaibhavad", "New dataset": 2}
		{"GitHub": "KennethEnevoldsen", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "isaac-chung", "Bug fixes": 4}
		{"GitHub": "imenelydiaker", "Review PR": 2}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{"GitHub": "isaac-chung", "Bug fixes": 3}
		{"GitHub": "KennethEnevoldsen", "Review PR": 2}