diff --git a/README.md b/README.md index a7b98f74b..d5b7765e9 100644 --- a/README.md +++ b/README.md @@ -700,7 +700,7 @@ The following guides provide step-by-step instructions: ## Release History -+ v0.19.0 (w/ Anserini v0.16.1): November 2, 2022 [[Release Notes](docs/release-notes/release-notes-v0.19.0.md)] ++ v0.19.0 (w/ Anserini v0.16.1): November 2, 2022 [[Release Notes](docs/release-notes/release-notes-v0.19.0.md)] [[Known Issues](docs/release-notes/known-issues-v0.19.0.md)] + v0.18.0 (w/ Anserini v0.15.0): September 26, 2022 [[Release Notes](docs/release-notes/release-notes-v0.18.0.md)] (First release based on Lucene 9) + v0.17.1 (w/ Anserini v0.14.4): August 13, 2022 [[Release Notes](docs/release-notes/release-notes-v0.17.1.md)] (Final release based on Lucene 8) + v0.17.0 (w/ Anserini v0.14.3): May 28, 2022 [[Release Notes](docs/release-notes/release-notes-v0.17.0.md)] diff --git a/docs/2cr/mrtydi.html b/docs/2cr/mrtydi.html index b0ab2197b..0be00eff6 100644 --- a/docs/2cr/mrtydi.html +++ b/docs/2cr/mrtydi.html @@ -223,13 +223,11 @@
-Evaluation commands: @@ -244,13 +242,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ar \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic \ - --output run.mrtydi.bm25.ar.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ar.test.txt --bm25 --hits 100
-Evaluation commands: @@ -265,13 +261,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language bn \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali \ - --output run.mrtydi.bm25.bn.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.bn.test.txt --bm25 --hits 100
-Evaluation commands: @@ -286,13 +280,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language en \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english \ - --output run.mrtydi.bm25.en.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.en.test.txt --bm25 --hits 100
-Evaluation commands: @@ -307,13 +299,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language fi \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish \ - --output run.mrtydi.bm25.fi.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.fi.test.txt --bm25 --hits 100
-Evaluation commands: @@ -328,13 +318,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language id \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian \ - --output run.mrtydi.bm25.id.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.id.test.txt --bm25 --hits 100
-Evaluation commands: @@ -349,13 +337,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ja \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese \ - --output run.mrtydi.bm25.ja.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ja.test.txt --bm25 --hits 100
-Evaluation commands: @@ -370,13 +356,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ko \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean \ - --output run.mrtydi.bm25.ko.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ko.test.txt --bm25 --hits 100
-Evaluation commands: @@ -391,13 +375,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ru \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian \ - --output run.mrtydi.bm25.ru.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ru.test.txt --bm25 --hits 100
-Evaluation commands: @@ -412,13 +394,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language sw \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili \ - --output run.mrtydi.bm25.sw.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.sw.test.txt --bm25 --hits 100
-Evaluation commands: @@ -433,13 +413,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language te \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu \ - --output run.mrtydi.bm25.te.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.te.test.txt --bm25 --hits 100
-Evaluation commands: @@ -523,13 +501,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language th \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai \ - --output run.mrtydi.bm25.th.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.th.test.txt --bm25 --hits 100
-Evaluation commands: @@ -544,13 +520,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt --hits 100
-Evaluation commands: @@ -565,13 +539,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt --hits 100
-Evaluation commands: @@ -586,13 +558,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.en.test.txt --hits 100
-Evaluation commands: @@ -607,13 +577,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt --hits 100
-Evaluation commands: @@ -628,13 +596,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.id.test.txt --hits 100
-Evaluation commands: @@ -649,13 +615,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt --hits 100
-Evaluation commands: @@ -670,13 +634,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt --hits 100
-Evaluation commands: @@ -691,13 +653,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt --hits 100
-Evaluation commands: @@ -712,13 +672,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt --hits 100
-Evaluation commands: @@ -733,13 +691,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.te.test.txt --hits 100
-Evaluation commands: @@ -823,14 +779,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.th.test.txt --hits 100
-Evaluation commands: @@ -845,14 +799,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt --hits 100
-Evaluation commands: @@ -867,14 +819,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt --hits 100
-Evaluation commands: @@ -889,14 +839,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt --hits 100
-Evaluation commands: @@ -911,14 +859,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt --hits 100
-Evaluation commands: @@ -933,14 +879,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt --hits 100
-Evaluation commands: @@ -955,14 +899,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt --hits 100
-Evaluation commands: @@ -977,14 +919,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt --hits 100
-Evaluation commands: @@ -999,14 +939,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt --hits 100
-Evaluation commands: @@ -1021,14 +959,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt --hits 100
-Evaluation commands: @@ -1043,14 +979,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt --hits 100
-Evaluation commands: @@ -1134,14 +1068,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt --hits 100
-Evaluation commands: @@ -1156,14 +1088,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt --hits 100
-Evaluation commands: @@ -1178,14 +1108,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt --hits 100
-Evaluation commands: @@ -1200,14 +1128,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt --hits 100
-Evaluation commands: @@ -1222,14 +1148,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt --hits 100
-Evaluation commands: @@ -1244,14 +1168,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt --hits 100
-Evaluation commands: @@ -1266,14 +1188,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt --hits 100
-Evaluation commands: @@ -1288,14 +1208,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt --hits 100
-Evaluation commands: @@ -1310,14 +1228,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt --hits 100
-Evaluation commands: @@ -1332,14 +1248,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt --hits 100
-Evaluation commands: @@ -1354,14 +1268,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt --hits 100
-Evaluation commands: @@ -1445,14 +1357,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt --hits 100
-Evaluation commands: @@ -1467,14 +1377,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt --hits 100
-Evaluation commands: @@ -1489,14 +1397,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt --hits 100
-Evaluation commands: @@ -1511,14 +1417,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt --hits 100
-Evaluation commands: @@ -1533,14 +1437,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt --hits 100
-Evaluation commands: @@ -1555,14 +1457,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt --hits 100
-Evaluation commands: @@ -1577,14 +1477,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt --hits 100
-Evaluation commands: @@ -1599,14 +1497,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt --hits 100
-Evaluation commands: @@ -1621,14 +1517,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt --hits 100
-Evaluation commands: @@ -1643,14 +1537,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt --hits 100
-Evaluation commands: @@ -1665,14 +1557,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt --hits 100
-Evaluation commands: @@ -1783,13 +1673,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt --hits 100
-Evaluation commands: @@ -1804,13 +1692,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ar \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic \ - --output run.mrtydi.bm25.ar.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ar.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1825,13 +1711,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language bn \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali \ - --output run.mrtydi.bm25.bn.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.bn.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1846,13 +1730,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language en \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english \ - --output run.mrtydi.bm25.en.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.en.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1867,13 +1749,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language fi \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish \ - --output run.mrtydi.bm25.fi.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.fi.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1888,13 +1768,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language id \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian \ - --output run.mrtydi.bm25.id.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.id.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1909,13 +1787,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ja \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese \ - --output run.mrtydi.bm25.ja.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ja.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1930,13 +1806,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ko \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean \ - --output run.mrtydi.bm25.ko.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ko.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1951,13 +1825,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language ru \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian \ - --output run.mrtydi.bm25.ru.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.ru.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1972,13 +1844,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language sw \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili \ - --output run.mrtydi.bm25.sw.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.sw.test.txt --bm25 --hits 100
-Evaluation commands: @@ -1993,13 +1863,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language te \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu \ - --output run.mrtydi.bm25.te.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.te.test.txt --bm25 --hits 100
-Evaluation commands: @@ -2083,13 +1951,11 @@python -m pyserini.search.lucene \ +
python -m pyserini.search.lucene --threads 16 --batch-size 128 \ --language th \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai \ - --output run.mrtydi.bm25.th.test.txt \ - --batch 36 --threads 12 \ - --bm25 --hits 100 + --output run.mrtydi.bm25.th.test.txt --bm25 --hits 100
-Evaluation commands: @@ -2104,13 +1970,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ar.test.txt --hits 100
-Evaluation commands: @@ -2125,13 +1989,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.bn.test.txt --hits 100
-Evaluation commands: @@ -2146,13 +2008,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.en.test.txt --hits 100
-Evaluation commands: @@ -2167,13 +2027,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.fi.test.txt --hits 100
-Evaluation commands: @@ -2188,13 +2046,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.id.test.txt --hits 100
-Evaluation commands: @@ -2209,13 +2065,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ja.test.txt --hits 100
-Evaluation commands: @@ -2230,13 +2084,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ko.test.txt --hits 100
-Evaluation commands: @@ -2251,13 +2103,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.ru.test.txt --hits 100
-Evaluation commands: @@ -2272,13 +2122,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.sw.test.txt --hits 100
-Evaluation commands: @@ -2293,13 +2141,11 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.te.test.txt --hits 100
-Evaluation commands: @@ -2383,14 +2229,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder castorini/mdpr-question-nq \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-nq \ - --output run.mrtydi.mdpr-split-pft-nq.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-split-pft-nq.th.test.txt --hits 100
-Evaluation commands: @@ -2405,14 +2249,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ar.test.txt --hits 100
-Evaluation commands: @@ -2427,14 +2269,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.bn.test.txt --hits 100
-Evaluation commands: @@ -2449,14 +2289,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.en.test.txt --hits 100
-Evaluation commands: @@ -2471,14 +2309,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.fi.test.txt --hits 100
-Evaluation commands: @@ -2493,14 +2329,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.id.test.txt --hits 100
-Evaluation commands: @@ -2515,14 +2349,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ja.test.txt --hits 100
-Evaluation commands: @@ -2537,14 +2369,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ko.test.txt --hits 100
-Evaluation commands: @@ -2559,14 +2389,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.ru.test.txt --hits 100
-Evaluation commands: @@ -2581,14 +2409,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.sw.test.txt --hits 100
-Evaluation commands: @@ -2603,14 +2429,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.te.test.txt --hits 100
-Evaluation commands: @@ -2694,14 +2518,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-nq \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-nq \ - --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-nq.th.test.txt --hits 100
-Evaluation commands: @@ -2716,14 +2538,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ar.test.txt --hits 100
-Evaluation commands: @@ -2738,14 +2558,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.bn.test.txt --hits 100
-Evaluation commands: @@ -2760,14 +2578,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.en.test.txt --hits 100
-Evaluation commands: @@ -2782,14 +2598,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.fi.test.txt --hits 100
-Evaluation commands: @@ -2804,14 +2618,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.id.test.txt --hits 100
-Evaluation commands: @@ -2826,14 +2638,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ja.test.txt --hits 100
-Evaluation commands: @@ -2848,14 +2658,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ko.test.txt --hits 100
-Evaluation commands: @@ -2870,14 +2678,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.ru.test.txt --hits 100
-Evaluation commands: @@ -2892,14 +2698,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.sw.test.txt --hits 100
-Evaluation commands: @@ -2914,14 +2718,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.te.test.txt --hits 100
-Evaluation commands: @@ -3005,14 +2807,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco \ - --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco.th.test.txt --hits 100
-Evaluation commands: @@ -3027,14 +2827,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-arabic-test \ --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ar.test.txt --hits 100
-Evaluation commands: @@ -3049,14 +2847,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-bengali-test \ --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.bn.test.txt --hits 100
-Evaluation commands: @@ -3071,14 +2867,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-english-test \ --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.en.test.txt --hits 100
-Evaluation commands: @@ -3093,14 +2887,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-finnish-test \ --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.fi.test.txt --hits 100
-Evaluation commands: @@ -3115,14 +2907,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-indonesian-test \ --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.id.test.txt --hits 100
-Evaluation commands: @@ -3137,14 +2927,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-japanese-test \ --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ja.test.txt --hits 100
-Evaluation commands: @@ -3159,14 +2947,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-korean-test \ --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ko.test.txt --hits 100
-Evaluation commands: @@ -3181,14 +2967,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-russian-test \ --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.ru.test.txt --hits 100
-Evaluation commands: @@ -3203,14 +2987,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-swahili-test \ --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.sw.test.txt --hits 100
-Evaluation commands: @@ -3225,14 +3007,12 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-telugu-test \ --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.te.test.txt --hits 100
-Evaluation commands: diff --git a/docs/2cr/msmarco-v1-passage.html b/docs/2cr/msmarco-v1-passage.html index 0e97a712a..23f3b279f 100644 --- a/docs/2cr/msmarco-v1-passage.html +++ b/docs/2cr/msmarco-v1-passage.html @@ -934,7 +934,7 @@python -m pyserini.search.faiss \ +
python -m pyserini.search.faiss --threads 16 --batch-size 512 \ --encoder-class auto \ --encoder castorini/mdpr-tied-pft-msmarco-ft-all \ --topics mrtydi-v1.1-thai-test \ --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all \ - --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt \ - --batch 36 --threads 12 \ - --hits 100 + --output run.mrtydi.mdpr-tied-pft-msmarco-ft-all.th.test.txt --hits 100
python -m pyserini.search.lucene \ --threads 16 --batch-size 128 \ - --index msmarco-v1-doc-slim \ + --index msmarco-v1-passage-d2q-t5-docvectors \ --topics dl19-passage \ --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dl19.txt \ --bm25 --rm3 --k1 0.9 --b 0.4 @@ -955,7 +955,7 @@
MS MARCO V1 Passage
python -m pyserini.search.lucene \ --threads 16 --batch-size 128 \ - --index msmarco-v1-doc-slim \ + --index msmarco-v1-passage-d2q-t5-docvectors \ --topics dl20 \ --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dl20.txt \ --bm25 --rm3 --k1 0.9 --b 0.4 @@ -976,7 +976,7 @@
MS MARCO V1 Passage
python -m pyserini.search.lucene \ --threads 16 --batch-size 128 \ - --index msmarco-v1-doc-slim \ + --index msmarco-v1-passage-d2q-t5-docvectors \ --topics msmarco-passage-dev-subset \ --output run.msmarco-v1-passage.bm25-rm3-d2q-t5-default.dev.txt \ --bm25 --rm3 --k1 0.9 --b 0.4 diff --git a/docs/release-notes/known-issues-v0.19.0.md b/docs/release-notes/known-issues-v0.19.0.md new file mode 100644 index 000000000..9ed03d928 --- /dev/null +++ b/docs/release-notes/known-issues-v0.19.0.md @@ -0,0 +1,3 @@ +# Pyserini Known Issues (v0.19.0) + ++ Index statistics for new Lucene 9 indexes are invalid. Thus, `validate_prebuilt_index` on an `IndexReader` will fail. See [#1334](https://github.com/castorini/pyserini/issues/1334). diff --git a/pyserini/index/lucene/_base.py b/pyserini/index/lucene/_base.py index f3b3aa671..13e740f70 100644 --- a/pyserini/index/lucene/_base.py +++ b/pyserini/index/lucene/_base.py @@ -190,54 +190,38 @@ def __init__(self, index_dir): self.reader = self.object.getReader(index_dir) @classmethod - def from_prebuilt_index(cls, prebuilt_index_name: str): + def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False): """Build an index reader from a prebuilt index; download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. + verbose : bool + Print status information. Returns ------- IndexReader Index reader built from the prebuilt index. """ - print(f'Attempting to initialize pre-built index {prebuilt_index_name}.') + if verbose: + print(f'Attempting to initialize pre-built index {prebuilt_index_name}.') + try: - index_dir = download_prebuilt_index(prebuilt_index_name) + index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose) except ValueError as e: print(str(e)) return None - print(f'Initializing {prebuilt_index_name}...') - return cls(index_dir) + if verbose: + print(f'Initializing {prebuilt_index_name}...') - @classmethod - def validate_prebuilt_index(cls, prebuilt_index_name: str): - """Validate prebuilt index stats against stored stats.""" - reader = cls.from_prebuilt_index(prebuilt_index_name) - stats = reader.stats() + index_reader = cls(index_dir) + # Validate index stats; will throw exception there are any issues. + index_reader.validate(prebuilt_index_name, verbose=verbose) - if prebuilt_index_name in TF_INDEX_INFO: - if stats['documents'] != TF_INDEX_INFO[prebuilt_index_name]['documents']: - raise ValueError('"documents" does not match!') - if stats['unique_terms'] != TF_INDEX_INFO[prebuilt_index_name]['unique_terms']: - raise ValueError('"unique_terms" does not match!') - if stats['total_terms'] != TF_INDEX_INFO[prebuilt_index_name]['total_terms']: - raise ValueError('"total_terms" does not match!') - else: - if stats['documents'] != IMPACT_INDEX_INFO[prebuilt_index_name]['documents']: - raise ValueError('"documents" does not match!') - if stats['unique_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['unique_terms']: - raise ValueError('"unique_terms" does not match!') - if stats['total_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['total_terms']: - raise ValueError('"total_terms" does not match!') - - print(reader.stats()) - print('Statistics match!') - - return True + return index_reader @staticmethod def list_prebuilt_indexes(): @@ -267,6 +251,34 @@ def analyze(self, text: str, analyzer=None) -> List[str]: tokens.append(token) return tokens + def validate(self, prebuilt_index_name: str, verbose=False): + """Validate this index against stored stats for a pre-built index.""" + stats = self.stats() + + if prebuilt_index_name in TF_INDEX_INFO: + if stats['documents'] != TF_INDEX_INFO[prebuilt_index_name]['documents']: + raise ValueError('Pre-built index fails consistency check: "documents" does not match!') + if stats['unique_terms'] != TF_INDEX_INFO[prebuilt_index_name]['unique_terms']: + raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!') + if stats['total_terms'] != TF_INDEX_INFO[prebuilt_index_name]['total_terms']: + raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!') + elif prebuilt_index_name in IMPACT_INDEX_INFO: + if stats['documents'] != IMPACT_INDEX_INFO[prebuilt_index_name]['documents']: + raise ValueError('Pre-built index fails consistency check: "documents" does not match!') + if stats['unique_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['unique_terms']: + raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!') + if stats['total_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['total_terms']: + raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!') + else: + print(f'Unknown pre-built index \'{prebuilt_index_name}\'!') + return False + + if verbose: + print(stats) + print(f'Index passes consistency checks against pre-built index \'{prebuilt_index_name}\'!') + + return True + def terms(self) -> Iterator[IndexTerm]: """Return an iterator over analyzed terms in the index. diff --git a/pyserini/search/lucene/_searcher.py b/pyserini/search/lucene/_searcher.py index 9f1da575e..f24f9839c 100644 --- a/pyserini/search/lucene/_searcher.py +++ b/pyserini/search/lucene/_searcher.py @@ -23,7 +23,7 @@ from typing import Dict, List, Optional, Union from pyserini.fusion import FusionMethod, reciprocal_rank_fusion -from pyserini.index import Document +from pyserini.index import Document, IndexReader from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap from pyserini.search import JQuery, JQueryGenerator from pyserini.trectools import TrecRun @@ -52,27 +52,39 @@ def __init__(self, index_dir: str): self.num_docs = self.object.get_total_num_docs() @classmethod - def from_prebuilt_index(cls, prebuilt_index_name: str): + def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False): """Build a searcher from a pre-built index; download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. + verbose : bool + Print status information. Returns ------- LuceneSearcher Searcher built from the prebuilt index. """ - print(f'Attempting to initialize pre-built index {prebuilt_index_name}.') + if verbose: + print(f'Attempting to initialize pre-built index {prebuilt_index_name}.') + try: - index_dir = download_prebuilt_index(prebuilt_index_name) + index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose) except ValueError as e: print(str(e)) return None - print(f'Initializing {prebuilt_index_name}...') + # Currently, the only way to validate stats is to create a separate IndexReader, because there is no method + # to obtain the underlying reader of a SimpleSearcher; see https://github.com/castorini/anserini/issues/2013 + index_reader = IndexReader(index_dir) + # This is janky as we're created a separate IndexReader for the sole purpose of validating index stats. + index_reader.validate(prebuilt_index_name, verbose=verbose) + + if verbose: + print(f'Initializing {prebuilt_index_name}...') + return cls(index_dir) @staticmethod diff --git a/pyserini/util.py b/pyserini/util.py index 6c831f6b0..5222ab544 100644 --- a/pyserini/util.py +++ b/pyserini/util.py @@ -228,7 +228,8 @@ def download_prebuilt_index(index_name, force=False, verbose=True, mirror=None): for url in target_index['urls']: local_filename = target_index['filename'] if 'filename' in target_index else None try: - return download_and_unpack_index(url, local_filename=local_filename, prebuilt=True, md5=index_md5) + return download_and_unpack_index(url, local_filename=local_filename, + prebuilt=True, md5=index_md5, verbose=verbose) except (HTTPError, URLError) as e: print(f'Unable to download pre-built index at {url}, trying next URL...') raise ValueError(f'Unable to download pre-built index at any known URLs.') diff --git a/scripts/validate_prebuilt_indexes.py b/scripts/validate_prebuilt_indexes.py index 15a8329e0..dc7cc4197 100644 --- a/scripts/validate_prebuilt_indexes.py +++ b/scripts/validate_prebuilt_indexes.py @@ -22,7 +22,7 @@ def check_sparse(index): for entry in index: print(f'# Validating "{entry}"...') - IndexReader.validate_prebuilt_index(entry) + IndexReader.from_prebuilt_index(entry, verbose=True) print('\n')