From 0fc584bdb8389168ab604f348de38c993bce4115 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Thu, 10 Nov 2022 08:26:16 -0500 Subject: [PATCH] Validate pre-built indexes on intialization (#1337) + Perform consistency checks when initializing pre-built indexes for IndexReader and LuceneSearcher. + Reduce verbosity of debug information during initialization. + Document known issues. + Update 2CR documentation. --- README.md | 2 +- docs/2cr/mrtydi.html | 660 +++++++-------------- docs/2cr/msmarco-v1-passage.html | 6 +- docs/release-notes/known-issues-v0.19.0.md | 3 + pyserini/index/lucene/_base.py | 70 ++- pyserini/search/lucene/_searcher.py | 22 +- pyserini/util.py | 3 +- scripts/validate_prebuilt_indexes.py | 2 +- 8 files changed, 288 insertions(+), 480 deletions(-) create mode 100644 docs/release-notes/known-issues-v0.19.0.md diff --git a/README.md b/README.md index a7b98f74b..d5b7765e9 100644 --- a/README.md +++ b/README.md @@ -700,7 +700,7 @@ The following guides provide step-by-step instructions: ## Release History -+ v0.19.0 (w/ Anserini v0.16.1): November 2, 2022 [[Release Notes](docs/release-notes/release-notes-v0.19.0.md)] ++ v0.19.0 (w/ Anserini v0.16.1): November 2, 2022 [[Release Notes](docs/release-notes/release-notes-v0.19.0.md)] [[Known Issues](docs/release-notes/known-issues-v0.19.0.md)] + v0.18.0 (w/ Anserini v0.15.0): September 26, 2022 [[Release Notes](docs/release-notes/release-notes-v0.18.0.md)] (First release based on Lucene 9) + v0.17.1 (w/ Anserini v0.14.4): August 13, 2022 [[Release Notes](docs/release-notes/release-notes-v0.17.1.md)] (Final release based on Lucene 8) + v0.17.0 (w/ Anserini v0.14.3): May 28, 2022 [[Release Notes](docs/release-notes/release-notes-v0.17.0.md)] diff --git a/docs/2cr/mrtydi.html b/docs/2cr/mrtydi.html index b0ab2197b..0be00eff6 100644 --- a/docs/2cr/mrtydi.html +++ b/docs/2cr/mrtydi.html @@ -223,13 +223,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ar \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic \
-  --output run.mrtydi.bm25.ar.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ar.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -244,13 +242,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language bn \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali \
-  --output run.mrtydi.bm25.bn.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.bn.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -265,13 +261,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language en \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english \
-  --output run.mrtydi.bm25.en.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.en.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -286,13 +280,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language fi \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish \
-  --output run.mrtydi.bm25.fi.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.fi.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -307,13 +299,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language id \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian \
-  --output run.mrtydi.bm25.id.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.id.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -328,13 +318,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ja \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese \
-  --output run.mrtydi.bm25.ja.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ja.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -349,13 +337,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ko \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean \
-  --output run.mrtydi.bm25.ko.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ko.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -370,13 +356,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ru \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian \
-  --output run.mrtydi.bm25.ru.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ru.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -391,13 +375,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language sw \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili \
-  --output run.mrtydi.bm25.sw.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.sw.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -412,13 +394,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language te \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu \
-  --output run.mrtydi.bm25.te.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.te.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -433,13 +413,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language th \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai \
-  --output run.mrtydi.bm25.th.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.th.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -523,13 +501,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt --hits 100
 
Evaluation commands: @@ -544,13 +520,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt --hits 100
 
Evaluation commands: @@ -565,13 +539,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.en.test.txt --hits 100
 
Evaluation commands: @@ -586,13 +558,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt --hits 100
 
Evaluation commands: @@ -607,13 +577,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.id.test.txt --hits 100
 
Evaluation commands: @@ -628,13 +596,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt --hits 100
 
Evaluation commands: @@ -649,13 +615,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt --hits 100
 
Evaluation commands: @@ -670,13 +634,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt --hits 100
 
Evaluation commands: @@ -691,13 +653,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt --hits 100
 
Evaluation commands: @@ -712,13 +672,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.te.test.txt --hits 100
 
Evaluation commands: @@ -733,13 +691,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.th.test.txt --hits 100
 
Evaluation commands: @@ -823,14 +779,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt --hits 100
 
Evaluation commands: @@ -845,14 +799,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt --hits 100
 
Evaluation commands: @@ -867,14 +819,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt --hits 100
 
Evaluation commands: @@ -889,14 +839,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt --hits 100
 
Evaluation commands: @@ -911,14 +859,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt --hits 100
 
Evaluation commands: @@ -933,14 +879,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt --hits 100
 
Evaluation commands: @@ -955,14 +899,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt --hits 100
 
Evaluation commands: @@ -977,14 +919,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt --hits 100
 
Evaluation commands: @@ -999,14 +939,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt --hits 100
 
Evaluation commands: @@ -1021,14 +959,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt --hits 100
 
Evaluation commands: @@ -1043,14 +979,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt --hits 100
 
Evaluation commands: @@ -1134,14 +1068,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt --hits 100
 
Evaluation commands: @@ -1156,14 +1088,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt --hits 100
 
Evaluation commands: @@ -1178,14 +1108,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt --hits 100
 
Evaluation commands: @@ -1200,14 +1128,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt --hits 100
 
Evaluation commands: @@ -1222,14 +1148,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt --hits 100
 
Evaluation commands: @@ -1244,14 +1168,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt --hits 100
 
Evaluation commands: @@ -1266,14 +1188,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt --hits 100
 
Evaluation commands: @@ -1288,14 +1208,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt --hits 100
 
Evaluation commands: @@ -1310,14 +1228,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt --hits 100
 
Evaluation commands: @@ -1332,14 +1248,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt --hits 100
 
Evaluation commands: @@ -1354,14 +1268,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt --hits 100
 
Evaluation commands: @@ -1445,14 +1357,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt --hits 100
 
Evaluation commands: @@ -1467,14 +1377,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt --hits 100
 
Evaluation commands: @@ -1489,14 +1397,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt --hits 100
 
Evaluation commands: @@ -1511,14 +1417,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt --hits 100
 
Evaluation commands: @@ -1533,14 +1437,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt --hits 100
 
Evaluation commands: @@ -1555,14 +1457,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt --hits 100
 
Evaluation commands: @@ -1577,14 +1477,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt --hits 100
 
Evaluation commands: @@ -1599,14 +1497,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt --hits 100
 
Evaluation commands: @@ -1621,14 +1517,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt --hits 100
 
Evaluation commands: @@ -1643,14 +1537,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt --hits 100
 
Evaluation commands: @@ -1665,14 +1557,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt --hits 100
 
Evaluation commands: @@ -1783,13 +1673,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ar \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic \
-  --output run.mrtydi.bm25.ar.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ar.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1804,13 +1692,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language bn \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali \
-  --output run.mrtydi.bm25.bn.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.bn.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1825,13 +1711,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language en \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english \
-  --output run.mrtydi.bm25.en.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.en.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1846,13 +1730,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language fi \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish \
-  --output run.mrtydi.bm25.fi.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.fi.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1867,13 +1749,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language id \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian \
-  --output run.mrtydi.bm25.id.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.id.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1888,13 +1768,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ja \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese \
-  --output run.mrtydi.bm25.ja.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ja.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1909,13 +1787,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ko \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean \
-  --output run.mrtydi.bm25.ko.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ko.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1930,13 +1806,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language ru \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian \
-  --output run.mrtydi.bm25.ru.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.ru.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1951,13 +1825,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language sw \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili \
-  --output run.mrtydi.bm25.sw.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.sw.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1972,13 +1844,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language te \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu \
-  --output run.mrtydi.bm25.te.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.te.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -1993,13 +1863,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.lucene \
+
python -m pyserini.search.lucene --threads 16 --batch-size 128 \
   --language th \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai \
-  --output run.mrtydi.bm25.th.test.txt \
-  --batch 36 --threads 12 \
-  --bm25 --hits 100
+  --output run.mrtydi.bm25.th.test.txt --bm25 --hits 100
 
Evaluation commands: @@ -2083,13 +1951,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt --hits 100
 
Evaluation commands: @@ -2104,13 +1970,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt --hits 100
 
Evaluation commands: @@ -2125,13 +1989,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.en.test.txt --hits 100
 
Evaluation commands: @@ -2146,13 +2008,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt --hits 100
 
Evaluation commands: @@ -2167,13 +2027,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.id.test.txt --hits 100
 
Evaluation commands: @@ -2188,13 +2046,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt --hits 100
 
Evaluation commands: @@ -2209,13 +2065,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt --hits 100
 
Evaluation commands: @@ -2230,13 +2084,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt --hits 100
 
Evaluation commands: @@ -2251,13 +2103,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt --hits 100
 
Evaluation commands: @@ -2272,13 +2122,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.te.test.txt --hits 100
 
Evaluation commands: @@ -2293,13 +2141,11 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder castorini/mdpr-question-nq \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-nq \
-  --output run.mrtydi.mdpr-split-pft-nq.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-split-pft-nq.th.test.txt --hits 100
 
Evaluation commands: @@ -2383,14 +2229,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt --hits 100
 
Evaluation commands: @@ -2405,14 +2249,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt --hits 100
 
Evaluation commands: @@ -2427,14 +2269,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt --hits 100
 
Evaluation commands: @@ -2449,14 +2289,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt --hits 100
 
Evaluation commands: @@ -2471,14 +2309,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt --hits 100
 
Evaluation commands: @@ -2493,14 +2329,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt --hits 100
 
Evaluation commands: @@ -2515,14 +2349,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt --hits 100
 
Evaluation commands: @@ -2537,14 +2369,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt --hits 100
 
Evaluation commands: @@ -2559,14 +2389,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt --hits 100
 
Evaluation commands: @@ -2581,14 +2409,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt --hits 100
 
Evaluation commands: @@ -2603,14 +2429,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-nq \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-nq \
-  --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt --hits 100
 
Evaluation commands: @@ -2694,14 +2518,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt --hits 100
 
Evaluation commands: @@ -2716,14 +2538,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt --hits 100
 
Evaluation commands: @@ -2738,14 +2558,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt --hits 100
 
Evaluation commands: @@ -2760,14 +2578,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt --hits 100
 
Evaluation commands: @@ -2782,14 +2598,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt --hits 100
 
Evaluation commands: @@ -2804,14 +2618,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt --hits 100
 
Evaluation commands: @@ -2826,14 +2638,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt --hits 100
 
Evaluation commands: @@ -2848,14 +2658,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt --hits 100
 
Evaluation commands: @@ -2870,14 +2678,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt --hits 100
 
Evaluation commands: @@ -2892,14 +2698,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt --hits 100
 
Evaluation commands: @@ -2914,14 +2718,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco \
-  --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt --hits 100
 
Evaluation commands: @@ -3005,14 +2807,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-arabic-test \
   --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt --hits 100
 
Evaluation commands: @@ -3027,14 +2827,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-bengali-test \
   --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt --hits 100
 
Evaluation commands: @@ -3049,14 +2847,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-english-test \
   --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt --hits 100
 
Evaluation commands: @@ -3071,14 +2867,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-finnish-test \
   --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt --hits 100
 
Evaluation commands: @@ -3093,14 +2887,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-indonesian-test \
   --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt --hits 100
 
Evaluation commands: @@ -3115,14 +2907,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-japanese-test \
   --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt --hits 100
 
Evaluation commands: @@ -3137,14 +2927,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-korean-test \
   --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt --hits 100
 
Evaluation commands: @@ -3159,14 +2947,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-russian-test \
   --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt --hits 100
 
Evaluation commands: @@ -3181,14 +2967,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-swahili-test \
   --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt --hits 100
 
Evaluation commands: @@ -3203,14 +2987,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-telugu-test \
   --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt --hits 100
 
Evaluation commands: @@ -3225,14 +3007,12 @@

Mr.TyDi

Command to generate run:
-
python -m pyserini.search.faiss \
+
python -m pyserini.search.faiss --threads 16 --batch-size 512 \
   --encoder-class auto \
   --encoder castorini/mdpr-tied-pft-msmarco-ft-all \
   --topics mrtydi-v1.1-thai-test \
   --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all \
-  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt \
-  --batch 36 --threads 12 \
-  --hits 100
+  --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt --hits 100
 
Evaluation commands: diff --git a/docs/2cr/msmarco-v1-passage.html b/docs/2cr/msmarco-v1-passage.html index 0e97a712a..23f3b279f 100644 --- a/docs/2cr/msmarco-v1-passage.html +++ b/docs/2cr/msmarco-v1-passage.html @@ -934,7 +934,7 @@

MS MARCO V1 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-passage-d2q-t5-docvectors \
   --topics dl19-passage \
   --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dl19.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
@@ -955,7 +955,7 @@ 

MS MARCO V1 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-passage-d2q-t5-docvectors \
   --topics dl20 \
   --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dl20.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
@@ -976,7 +976,7 @@ 

MS MARCO V1 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-passage-d2q-t5-docvectors \
   --topics msmarco-passage-dev-subset \
   --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dev.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
diff --git a/docs/release-notes/known-issues-v0.19.0.md b/docs/release-notes/known-issues-v0.19.0.md
new file mode 100644
index 000000000..9ed03d928
--- /dev/null
+++ b/docs/release-notes/known-issues-v0.19.0.md
@@ -0,0 +1,3 @@
+# Pyserini Known Issues (v0.19.0)
+
++ Index statistics for new Lucene 9 indexes are invalid. Thus, `validate_prebuilt_index` on an `IndexReader` will fail. See [#1334](https://github.com/castorini/pyserini/issues/1334).
diff --git a/pyserini/index/lucene/_base.py b/pyserini/index/lucene/_base.py
index f3b3aa671..13e740f70 100644
--- a/pyserini/index/lucene/_base.py
+++ b/pyserini/index/lucene/_base.py
@@ -190,54 +190,38 @@ def __init__(self, index_dir):
         self.reader = self.object.getReader(index_dir)
 
     @classmethod
-    def from_prebuilt_index(cls, prebuilt_index_name: str):
+    def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False):
         """Build an index reader from a prebuilt index; download the index if necessary.
 
         Parameters
         ----------
         prebuilt_index_name : str
             Prebuilt index name.
+        verbose : bool
+            Print status information.
 
         Returns
         -------
         IndexReader
             Index reader built from the prebuilt index.
         """
-        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        if verbose:
+            print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+
         try:
-            index_dir = download_prebuilt_index(prebuilt_index_name)
+            index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose)
         except ValueError as e:
             print(str(e))
             return None
 
-        print(f'Initializing {prebuilt_index_name}...')
-        return cls(index_dir)
+        if verbose:
+            print(f'Initializing {prebuilt_index_name}...')
 
-    @classmethod
-    def validate_prebuilt_index(cls, prebuilt_index_name: str):
-        """Validate prebuilt index stats against stored stats."""
-        reader = cls.from_prebuilt_index(prebuilt_index_name)
-        stats = reader.stats()
+        index_reader = cls(index_dir)
+        # Validate index stats; will throw exception there are any issues.
+        index_reader.validate(prebuilt_index_name, verbose=verbose)
 
-        if prebuilt_index_name in TF_INDEX_INFO:
-            if stats['documents'] != TF_INDEX_INFO[prebuilt_index_name]['documents']:
-                raise ValueError('"documents" does not match!')
-            if stats['unique_terms'] != TF_INDEX_INFO[prebuilt_index_name]['unique_terms']:
-                raise ValueError('"unique_terms" does not match!')
-            if stats['total_terms'] != TF_INDEX_INFO[prebuilt_index_name]['total_terms']:
-                raise ValueError('"total_terms" does not match!')
-        else:
-            if stats['documents'] != IMPACT_INDEX_INFO[prebuilt_index_name]['documents']:
-                raise ValueError('"documents" does not match!')
-            if stats['unique_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['unique_terms']:
-                raise ValueError('"unique_terms" does not match!')
-            if stats['total_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['total_terms']:
-                raise ValueError('"total_terms" does not match!')
-
-        print(reader.stats())
-        print('Statistics match!')
-
-        return True
+        return index_reader
 
     @staticmethod
     def list_prebuilt_indexes():
@@ -267,6 +251,34 @@ def analyze(self, text: str, analyzer=None) -> List[str]:
             tokens.append(token)
         return tokens
 
+    def validate(self, prebuilt_index_name: str, verbose=False):
+        """Validate this index against stored stats for a pre-built index."""
+        stats = self.stats()
+
+        if prebuilt_index_name in TF_INDEX_INFO:
+            if stats['documents'] != TF_INDEX_INFO[prebuilt_index_name]['documents']:
+                raise ValueError('Pre-built index fails consistency check: "documents" does not match!')
+            if stats['unique_terms'] != TF_INDEX_INFO[prebuilt_index_name]['unique_terms']:
+                raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!')
+            if stats['total_terms'] != TF_INDEX_INFO[prebuilt_index_name]['total_terms']:
+                raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!')
+        elif prebuilt_index_name in IMPACT_INDEX_INFO:
+            if stats['documents'] != IMPACT_INDEX_INFO[prebuilt_index_name]['documents']:
+                raise ValueError('Pre-built index fails consistency check: "documents" does not match!')
+            if stats['unique_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['unique_terms']:
+                raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!')
+            if stats['total_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['total_terms']:
+                raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!')
+        else:
+            print(f'Unknown pre-built index \'{prebuilt_index_name}\'!')
+            return False
+
+        if verbose:
+            print(stats)
+            print(f'Index passes consistency checks against pre-built index \'{prebuilt_index_name}\'!')
+
+        return True
+
     def terms(self) -> Iterator[IndexTerm]:
         """Return an iterator over analyzed terms in the index.
 
diff --git a/pyserini/search/lucene/_searcher.py b/pyserini/search/lucene/_searcher.py
index 9f1da575e..f24f9839c 100644
--- a/pyserini/search/lucene/_searcher.py
+++ b/pyserini/search/lucene/_searcher.py
@@ -23,7 +23,7 @@
 from typing import Dict, List, Optional, Union
 
 from pyserini.fusion import FusionMethod, reciprocal_rank_fusion
-from pyserini.index import Document
+from pyserini.index import Document, IndexReader
 from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap
 from pyserini.search import JQuery, JQueryGenerator
 from pyserini.trectools import TrecRun
@@ -52,27 +52,39 @@ def __init__(self, index_dir: str):
         self.num_docs = self.object.get_total_num_docs()
 
     @classmethod
-    def from_prebuilt_index(cls, prebuilt_index_name: str):
+    def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False):
         """Build a searcher from a pre-built index; download the index if necessary.
 
         Parameters
         ----------
         prebuilt_index_name : str
             Prebuilt index name.
+        verbose : bool
+            Print status information.
 
         Returns
         -------
         LuceneSearcher
             Searcher built from the prebuilt index.
         """
-        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        if verbose:
+            print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+
         try:
-            index_dir = download_prebuilt_index(prebuilt_index_name)
+            index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose)
         except ValueError as e:
             print(str(e))
             return None
 
-        print(f'Initializing {prebuilt_index_name}...')
+        # Currently, the only way to validate stats is to create a separate IndexReader, because there is no method
+        # to obtain the underlying reader of a SimpleSearcher; see https://github.com/castorini/anserini/issues/2013
+        index_reader = IndexReader(index_dir)
+        # This is janky as we're created a separate IndexReader for the sole purpose of validating index stats.
+        index_reader.validate(prebuilt_index_name, verbose=verbose)
+
+        if verbose:
+            print(f'Initializing {prebuilt_index_name}...')
+
         return cls(index_dir)
 
     @staticmethod
diff --git a/pyserini/util.py b/pyserini/util.py
index 6c831f6b0..5222ab544 100644
--- a/pyserini/util.py
+++ b/pyserini/util.py
@@ -228,7 +228,8 @@ def download_prebuilt_index(index_name, force=False, verbose=True, mirror=None):
     for url in target_index['urls']:
         local_filename = target_index['filename'] if 'filename' in target_index else None
         try:
-            return download_and_unpack_index(url, local_filename=local_filename, prebuilt=True, md5=index_md5)
+            return download_and_unpack_index(url, local_filename=local_filename,
+                                             prebuilt=True, md5=index_md5, verbose=verbose)
         except (HTTPError, URLError) as e:
             print(f'Unable to download pre-built index at {url}, trying next URL...')
     raise ValueError(f'Unable to download pre-built index at any known URLs.')
diff --git a/scripts/validate_prebuilt_indexes.py b/scripts/validate_prebuilt_indexes.py
index 15a8329e0..dc7cc4197 100644
--- a/scripts/validate_prebuilt_indexes.py
+++ b/scripts/validate_prebuilt_indexes.py
@@ -22,7 +22,7 @@
 def check_sparse(index):
     for entry in index:
         print(f'# Validating "{entry}"...')
-        IndexReader.validate_prebuilt_index(entry)
+        IndexReader.from_prebuilt_index(entry, verbose=True)
         print('\n')