From 59330e355b4aaf6754622cb3a136259dea0d8d05 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Sat, 8 Jun 2024 08:47:42 -0400 Subject: [PATCH] Rename MS MARCO regressions into consistent schema (#2519) --- README.md | 152 +++++++++--------- docs/regressions.md | 146 ++++++++--------- ...l19-doc-segmented.unicoil-noexp.cached.md} | 12 +- ...ions-dl19-doc-segmented.unicoil.cached.md} | 12 +- ...sage.bge-base-en-v1.5.hnsw-int8.cached.md} | 24 +-- ...passage.bge-base-en-v1.5.hnsw-int8.onnx.md | 10 +- ...9-passage.bge-base-en-v1.5.hnsw.cached.md} | 24 +-- ...dl19-passage.bge-base-en-v1.5.hnsw.onnx.md | 10 +- ...re-embed-english-v3.0.hnsw-int8.cached.md} | 24 +-- ....cohere-embed-english-v3.0.hnsw.cached.md} | 24 +-- ...assage.cos-dpr-distil.hnsw-int8.cached.md} | 24 +-- ...9-passage.cos-dpr-distil.hnsw-int8.onnx.md | 10 +- ...l19-passage.cos-dpr-distil.hnsw.cached.md} | 24 +-- ...s-dl19-passage.cos-dpr-distil.hnsw.onnx.md | 10 +- ...9-passage.openai-ada2.hnsw-int8.cached.md} | 24 +-- ...s-dl19-passage.openai-ada2.hnsw.cached.md} | 24 +-- ...sions-dl19-passage.splade-pp-ed.cached.md} | 46 +++--- ...sions-dl19-passage.splade-pp-sd.cached.md} | 46 +++--- ...ions-dl19-passage.unicoil-noexp.cached.md} | 46 +++--- ...egressions-dl19-passage.unicoil.cached.md} | 46 +++--- ...l20-doc-segmented.unicoil-noexp.cached.md} | 46 +++--- ...ions-dl20-doc-segmented.unicoil.cached.md} | 46 +++--- ...sage.bge-base-en-v1.5.hnsw-int8.cached.md} | 24 +-- ...0-passage.bge-base-en-v1.5.hnsw.cached.md} | 24 +-- ...re-embed-english-v3.0.hnsw-int8.cached.md} | 24 +-- ....cohere-embed-english-v3.0.hnsw.cached.md} | 24 +-- ...assage.cos-dpr-distil.hnsw-int8.cached.md} | 24 +-- ...l20-passage.cos-dpr-distil.hnsw.cached.md} | 24 +-- ...0-passage.openai-ada2.hnsw-int8.cached.md} | 24 +-- ...s-dl20-passage.openai-ada2.hnsw.cached.md} | 24 +-- ...sions-dl20-passage.splade-pp-ed.cached.md} | 46 +++--- ...sions-dl20-passage.splade-pp-sd.cached.md} | 46 +++--- ...ions-dl20-passage.unicoil-noexp.cached.md} | 46 +++--- ...egressions-dl20-passage.unicoil.cached.md} | 46 +++--- ...-doc-segmented.unicoil-0shot-v2.cached.md} | 44 ++--- ...l21-doc-segmented.unicoil-0shot.cached.md} | 20 +-- ...egmented.unicoil-noexp-0shot-v2.cached.md} | 44 ++--- ...c-segmented.unicoil-noexp-0shot.cached.md} | 20 +-- ...sions-dl21-passage.splade-pp-ed.cached.md} | 50 +++--- ...sions-dl21-passage.splade-pp-sd.cached.md} | 50 +++--- ...ions-dl21-passage.unicoil-0shot.cached.md} | 50 +++--- ...l21-passage.unicoil-noexp-0shot.cached.md} | 50 +++--- ...-doc-segmented.unicoil-0shot-v2.cached.md} | 44 ++--- ...egmented.unicoil-noexp-0shot-v2.cached.md} | 44 ++--- ...sions-dl22-passage.splade-pp-ed.cached.md} | 50 +++--- ...sions-dl22-passage.splade-pp-sd.cached.md} | 50 +++--- ...ions-dl22-passage.unicoil-0shot.cached.md} | 50 +++--- ...l22-passage.unicoil-noexp-0shot.cached.md} | 50 +++--- ...-doc-segmented.unicoil-0shot-v2.cached.md} | 44 ++--- ...egmented.unicoil-noexp-0shot-v2.cached.md} | 44 ++--- ...sions-dl23-passage.splade-pp-ed.cached.md} | 50 +++--- ...sions-dl23-passage.splade-pp-sd.cached.md} | 50 +++--- ...ions-dl23-passage.unicoil-0shot.cached.md} | 50 +++--- ...l23-passage.unicoil-noexp-0shot.cached.md} | 50 +++--- ...-v1-doc-segmented.unicoil-noexp.cached.md} | 12 +- ...smarco-v1-doc-segmented.unicoil.cached.md} | 12 +- ...sage.bge-base-en-v1.5.hnsw-int8.cached.md} | 24 +-- ...1-passage.bge-base-en-v1.5.hnsw.cached.md} | 24 +-- ...re-embed-english-v3.0.hnsw-int8.cached.md} | 24 +-- ....cohere-embed-english-v3.0.hnsw.cached.md} | 24 +-- ...assage.cos-dpr-distil.hnsw-int8.cached.md} | 24 +-- ...-v1-passage.cos-dpr-distil.hnsw.cached.md} | 24 +-- ...s-msmarco-v1-passage.deepimpact.cached.md} | 22 +-- ...o-v1-passage.distill-splade-max.cached.md} | 22 +-- ...1-passage.openai-ada2.hnsw-int8.cached.md} | 24 +-- ...rco-v1-passage.openai-ada2.hnsw.cached.md} | 24 +-- ...msmarco-v1-passage.splade-pp-ed.cached.md} | 22 +-- ...msmarco-v1-passage.splade-pp-sd.cached.md} | 22 +-- ...smarco-v1-passage.unicoil-noexp.cached.md} | 12 +- ...passage.unicoil-tilde-expansion.cached.md} | 12 +- ...ions-msmarco-v1-passage.unicoil.cached.md} | 12 +- ...-doc-segmented.unicoil-0shot-v2.cached.md} | 12 +- ...-v2-doc-segmented.unicoil-0shot.cached.md} | 12 +- ...egmented.unicoil-noexp-0shot-v2.cached.md} | 12 +- ...c-segmented.unicoil-noexp-0shot.cached.md} | 12 +- ...msmarco-v2-passage.splade-pp-ed.cached.md} | 26 +-- ...msmarco-v2-passage.splade-pp-sd.cached.md} | 26 +-- ...smarco-v2-passage.unicoil-0shot.cached.md} | 10 +- ...-v2-passage.unicoil-noexp-0shot.cached.md} | 10 +- src/main/python/regressions-batch03.txt | 148 ++++++++--------- src/main/python/run_regression.py | 8 +- ...c-segmented.unicoil-noexp.cached.template} | 0 ...l19-doc-segmented.unicoil.cached.template} | 0 ...ge-base-en-v1.5.hnsw-int8.cached.template} | 0 ...age.bge-base-en-v1.5.hnsw.cached.template} | 0 ...ed-english-v3.0.hnsw-int8.cached.template} | 0 ...e-embed-english-v3.0.hnsw.cached.template} | 0 ....cos-dpr-distil.hnsw-int8.cached.template} | 0 ...ssage.cos-dpr-distil.hnsw.cached.template} | 0 ...age.openai-ada2.hnsw-int8.cached.template} | 0 ...-passage.openai-ada2.hnsw.cached.template} | 0 ...dl19-passage.splade-pp-ed.cached.template} | 0 ...dl19-passage.splade-pp-sd.cached.template} | 0 ...l19-passage.unicoil-noexp.cached.template} | 0 ...e => dl19-passage.unicoil.cached.template} | 0 ...c-segmented.unicoil-noexp.cached.template} | 0 ...l20-doc-segmented.unicoil.cached.template} | 0 ...ge-base-en-v1.5.hnsw-int8.cached.template} | 0 ...age.bge-base-en-v1.5.hnsw.cached.template} | 0 ...ed-english-v3.0.hnsw-int8.cached.template} | 0 ...e-embed-english-v3.0.hnsw.cached.template} | 0 ....cos-dpr-distil.hnsw-int8.cached.template} | 0 ...ssage.cos-dpr-distil.hnsw.cached.template} | 0 ...age.openai-ada2.hnsw-int8.cached.template} | 0 ...-passage.openai-ada2.hnsw.cached.template} | 0 ...dl20-passage.splade-pp-ed.cached.template} | 0 ...dl20-passage.splade-pp-sd.cached.template} | 0 ...l20-passage.unicoil-noexp.cached.template} | 0 ...e => dl20-passage.unicoil.cached.template} | 0 ...egmented.unicoil-0shot-v2.cached.template} | 0 ...c-segmented.unicoil-0shot.cached.template} | 0 ...ed.unicoil-noexp-0shot-v2.cached.template} | 0 ...ented.unicoil-noexp-0shot.cached.template} | 0 ...dl21-passage.splade-pp-ed.cached.template} | 0 ...dl21-passage.splade-pp-sd.cached.template} | 0 ...l21-passage.unicoil-0shot.cached.template} | 0 ...ssage.unicoil-noexp-0shot.cached.template} | 0 ...egmented.unicoil-0shot-v2.cached.template} | 0 ...ed.unicoil-noexp-0shot-v2.cached.template} | 0 ...dl22-passage.splade-pp-ed.cached.template} | 0 ...dl22-passage.splade-pp-sd.cached.template} | 0 ...l22-passage.unicoil-0shot.cached.template} | 0 ...ssage.unicoil-noexp-0shot.cached.template} | 0 ...egmented.unicoil-0shot-v2.cached.template} | 0 ...ed.unicoil-noexp-0shot-v2.cached.template} | 0 ...dl23-passage.splade-pp-ed.cached.template} | 0 ...dl23-passage.splade-pp-sd.cached.template} | 0 ...l23-passage.unicoil-0shot.cached.template} | 0 ...ssage.unicoil-noexp-0shot.cached.template} | 0 ...c-segmented.unicoil-noexp.cached.template} | 0 ...-v1-doc-segmented.unicoil.cached.template} | 0 ...ge-base-en-v1.5.hnsw-int8.cached.template} | 0 ...age.bge-base-en-v1.5.hnsw.cached.template} | 0 ...ed-english-v3.0.hnsw-int8.cached.template} | 0 ...e-embed-english-v3.0.hnsw.cached.template} | 0 ....cos-dpr-distil.hnsw-int8.cached.template} | 0 ...ssage.cos-dpr-distil.hnsw.cached.template} | 0 ...rco-v1-passage.deepimpact.cached.template} | 0 ...assage.distill-splade-max.cached.template} | 0 ...age.openai-ada2.hnsw-int8.cached.template} | 0 ...-passage.openai-ada2.hnsw.cached.template} | 0 ...o-v1-passage.splade-pp-ed.cached.template} | 0 ...o-v1-passage.splade-pp-sd.cached.template} | 0 ...-v1-passage.unicoil-noexp.cached.template} | 0 ...e.unicoil-tilde-expansion.cached.template} | 0 ...smarco-v1-passage.unicoil.cached.template} | 0 ...egmented.unicoil-0shot-v2.cached.template} | 0 ...c-segmented.unicoil-0shot.cached.template} | 0 ...ed.unicoil-noexp-0shot-v2.cached.template} | 0 ...ented.unicoil-noexp-0shot.cached.template} | 0 ...o-v2-passage.splade-pp-ed.cached.template} | 0 ...o-v2-passage.splade-pp-sd.cached.template} | 0 ...-v2-passage.unicoil-0shot.cached.template} | 0 ...ssage.unicoil-noexp-0shot.cached.template} | 0 ...9-doc-segmented.unicoil-noexp.cached.yaml} | 0 ...=> dl19-doc-segmented.unicoil.cached.yaml} | 0 ...ge.bge-base-en-v1.5.hnsw-int8.cached.yaml} | 2 +- ...ssage.bge-base-en-v1.5.hnsw-int8.onnx.yaml | 2 +- ...passage.bge-base-en-v1.5.hnsw.cached.yaml} | 2 +- ...19-passage.bge-base-en-v1.5.hnsw.onnx.yaml | 2 +- ...-embed-english-v3.0.hnsw-int8.cached.yaml} | 2 +- ...ohere-embed-english-v3.0.hnsw.cached.yaml} | 2 +- ...sage.cos-dpr-distil.hnsw-int8.cached.yaml} | 2 +- ...passage.cos-dpr-distil.hnsw-int8.onnx.yaml | 2 +- ...9-passage.cos-dpr-distil.hnsw.cached.yaml} | 2 +- ...dl19-passage.cos-dpr-distil.hnsw.onnx.yaml | 2 +- ...passage.openai-ada2.hnsw-int8.cached.yaml} | 2 +- ...dl19-passage.openai-ada2.hnsw.cached.yaml} | 2 +- ... => dl19-passage.splade-pp-ed.cached.yaml} | 6 +- ... => dl19-passage.splade-pp-sd.cached.yaml} | 6 +- ...=> dl19-passage.unicoil-noexp.cached.yaml} | 6 +- ....yaml => dl19-passage.unicoil.cached.yaml} | 6 +- ...0-doc-segmented.unicoil-noexp.cached.yaml} | 6 +- ...=> dl20-doc-segmented.unicoil.cached.yaml} | 6 +- ...ge.bge-base-en-v1.5.hnsw-int8.cached.yaml} | 2 +- ...passage.bge-base-en-v1.5.hnsw.cached.yaml} | 2 +- ...-embed-english-v3.0.hnsw-int8.cached.yaml} | 2 +- ...ohere-embed-english-v3.0.hnsw.cached.yaml} | 2 +- ...sage.cos-dpr-distil.hnsw-int8.cached.yaml} | 2 +- ...0-passage.cos-dpr-distil.hnsw.cached.yaml} | 2 +- ...passage.openai-ada2.hnsw-int8.cached.yaml} | 2 +- ...dl20-passage.openai-ada2.hnsw.cached.yaml} | 2 +- ... => dl20-passage.splade-pp-ed.cached.yaml} | 6 +- ... => dl20-passage.splade-pp-sd.cached.yaml} | 6 +- ...=> dl20-passage.unicoil-noexp.cached.yaml} | 6 +- ....yaml => dl20-passage.unicoil.cached.yaml} | 6 +- ...oc-segmented.unicoil-0shot-v2.cached.yaml} | 6 +- ...1-doc-segmented.unicoil-0shot.cached.yaml} | 2 +- ...mented.unicoil-noexp-0shot-v2.cached.yaml} | 6 +- ...segmented.unicoil-noexp-0shot.cached.yaml} | 2 +- ... => dl21-passage.splade-pp-ed.cached.yaml} | 6 +- ... => dl21-passage.splade-pp-sd.cached.yaml} | 6 +- ...=> dl21-passage.unicoil-0shot.cached.yaml} | 6 +- ...1-passage.unicoil-noexp-0shot.cached.yaml} | 6 +- ...oc-segmented.unicoil-0shot-v2.cached.yaml} | 6 +- ...mented.unicoil-noexp-0shot-v2.cached.yaml} | 6 +- ... => dl22-passage.splade-pp-ed.cached.yaml} | 6 +- ... => dl22-passage.splade-pp-sd.cached.yaml} | 6 +- ...=> dl22-passage.unicoil-0shot.cached.yaml} | 6 +- ...2-passage.unicoil-noexp-0shot.cached.yaml} | 6 +- ...oc-segmented.unicoil-0shot-v2.cached.yaml} | 6 +- ...mented.unicoil-noexp-0shot-v2.cached.yaml} | 6 +- ... => dl23-passage.splade-pp-ed.cached.yaml} | 6 +- ... => dl23-passage.splade-pp-sd.cached.yaml} | 6 +- ...=> dl23-passage.unicoil-0shot.cached.yaml} | 6 +- ...3-passage.unicoil-noexp-0shot.cached.yaml} | 6 +- ...1-doc-segmented.unicoil-noexp.cached.yaml} | 0 ...arco-v1-doc-segmented.unicoil.cached.yaml} | 0 ...ge.bge-base-en-v1.5.hnsw-int8.cached.yaml} | 2 +- ...passage.bge-base-en-v1.5.hnsw.cached.yaml} | 2 +- ...-embed-english-v3.0.hnsw-int8.cached.yaml} | 2 +- ...ohere-embed-english-v3.0.hnsw.cached.yaml} | 2 +- ...sage.cos-dpr-distil.hnsw-int8.cached.yaml} | 2 +- ...1-passage.cos-dpr-distil.hnsw.cached.yaml} | 2 +- ...msmarco-v1-passage.deepimpact.cached.yaml} | 2 +- ...v1-passage.distill-splade-max.cached.yaml} | 2 +- ...passage.openai-ada2.hnsw-int8.cached.yaml} | 2 +- ...o-v1-passage.openai-ada2.hnsw.cached.yaml} | 2 +- ...marco-v1-passage.splade-pp-ed.cached.yaml} | 6 +- ...marco-v1-passage.splade-pp-sd.cached.yaml} | 6 +- ...arco-v1-passage.unicoil-noexp.cached.yaml} | 0 ...ssage.unicoil-tilde-expansion.cached.yaml} | 0 ...=> msmarco-v1-passage.unicoil.cached.yaml} | 0 ...oc-segmented.unicoil-0shot-v2.cached.yaml} | 0 ...2-doc-segmented.unicoil-0shot.cached.yaml} | 0 ...mented.unicoil-noexp-0shot-v2.cached.yaml} | 0 ...segmented.unicoil-noexp-0shot.cached.yaml} | 0 ...marco-v2-passage.splade-pp-ed.cached.yaml} | 2 +- ...marco-v2-passage.splade-pp-sd.cached.yaml} | 2 +- ...arco-v2-passage.unicoil-0shot.cached.yaml} | 0 ...2-passage.unicoil-noexp-0shot.cached.yaml} | 0 231 files changed, 1488 insertions(+), 1486 deletions(-) rename docs/regressions/{regressions-dl19-doc-segmented.unicoil-noexp.md => regressions-dl19-doc-segmented.unicoil-noexp.cached.md} (95%) rename docs/regressions/{regressions-dl19-doc-segmented.unicoil.md => regressions-dl19-doc-segmented.unicoil.cached.md} (95%) rename docs/regressions/{regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.md => regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl19-passage.bge-base-en-v1.5.hnsw.md => regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md} (89%) rename docs/regressions/{regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.md => regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md} (88%) rename docs/regressions/{regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.md => regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md} (87%) rename docs/regressions/{regressions-dl19-passage.cos-dpr-distil.hnsw-int8.md => regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl19-passage.cos-dpr-distil.hnsw.md => regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md} (89%) rename docs/regressions/{regressions-dl19-passage.openai-ada2.hnsw-int8.md => regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl19-passage.openai-ada2.hnsw.md => regressions-dl19-passage.openai-ada2.hnsw.cached.md} (90%) rename docs/regressions/{regressions-dl19-passage.splade-pp-ed.md => regressions-dl19-passage.splade-pp-ed.cached.md} (88%) rename docs/regressions/{regressions-dl19-passage.splade-pp-sd.md => regressions-dl19-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-dl19-passage.unicoil-noexp.md => regressions-dl19-passage.unicoil-noexp.cached.md} (87%) rename docs/regressions/{regressions-dl19-passage.unicoil.md => regressions-dl19-passage.unicoil.cached.md} (84%) rename docs/regressions/{regressions-dl20-doc-segmented.unicoil-noexp.md => regressions-dl20-doc-segmented.unicoil-noexp.cached.md} (89%) rename docs/regressions/{regressions-dl20-doc-segmented.unicoil.md => regressions-dl20-doc-segmented.unicoil.cached.md} (82%) rename docs/regressions/{regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.md => regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl20-passage.bge-base-en-v1.5.hnsw.md => regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md} (89%) rename docs/regressions/{regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.md => regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md} (88%) rename docs/regressions/{regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.md => regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md} (87%) rename docs/regressions/{regressions-dl20-passage.cos-dpr-distil.hnsw-int8.md => regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl20-passage.cos-dpr-distil.hnsw.md => regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md} (89%) rename docs/regressions/{regressions-dl20-passage.openai-ada2.hnsw-int8.md => regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md} (90%) rename docs/regressions/{regressions-dl20-passage.openai-ada2.hnsw.md => regressions-dl20-passage.openai-ada2.hnsw.cached.md} (90%) rename docs/regressions/{regressions-dl20-passage.splade-pp-ed.md => regressions-dl20-passage.splade-pp-ed.cached.md} (89%) rename docs/regressions/{regressions-dl20-passage.splade-pp-sd.md => regressions-dl20-passage.splade-pp-sd.cached.md} (89%) rename docs/regressions/{regressions-dl20-passage.unicoil-noexp.md => regressions-dl20-passage.unicoil-noexp.cached.md} (87%) rename docs/regressions/{regressions-dl20-passage.unicoil.md => regressions-dl20-passage.unicoil.cached.md} (84%) rename docs/regressions/{regressions-dl21-doc-segmented.unicoil-0shot-v2.md => regressions-dl21-doc-segmented.unicoil-0shot-v2.cached.md} (89%) rename docs/regressions/{regressions-dl21-doc-segmented.unicoil-0shot.md => regressions-dl21-doc-segmented.unicoil-0shot.cached.md} (92%) rename docs/regressions/{regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.md => regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.md} (87%) rename docs/regressions/{regressions-dl21-doc-segmented.unicoil-noexp-0shot.md => regressions-dl21-doc-segmented.unicoil-noexp-0shot.cached.md} (92%) rename docs/regressions/{regressions-dl21-passage.splade-pp-ed.md => regressions-dl21-passage.splade-pp-ed.cached.md} (88%) rename docs/regressions/{regressions-dl21-passage.splade-pp-sd.md => regressions-dl21-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-dl21-passage.unicoil-0shot.md => regressions-dl21-passage.unicoil-0shot.cached.md} (88%) rename docs/regressions/{regressions-dl21-passage.unicoil-noexp-0shot.md => regressions-dl21-passage.unicoil-noexp-0shot.cached.md} (85%) rename docs/regressions/{regressions-dl22-doc-segmented.unicoil-0shot-v2.md => regressions-dl22-doc-segmented.unicoil-0shot-v2.cached.md} (89%) rename docs/regressions/{regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.md => regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.md} (87%) rename docs/regressions/{regressions-dl22-passage.splade-pp-ed.md => regressions-dl22-passage.splade-pp-ed.cached.md} (88%) rename docs/regressions/{regressions-dl22-passage.splade-pp-sd.md => regressions-dl22-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-dl22-passage.unicoil-0shot.md => regressions-dl22-passage.unicoil-0shot.cached.md} (89%) rename docs/regressions/{regressions-dl22-passage.unicoil-noexp-0shot.md => regressions-dl22-passage.unicoil-noexp-0shot.cached.md} (87%) rename docs/regressions/{regressions-dl23-doc-segmented.unicoil-0shot-v2.md => regressions-dl23-doc-segmented.unicoil-0shot-v2.cached.md} (89%) rename docs/regressions/{regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.md => regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.md} (87%) rename docs/regressions/{regressions-dl23-passage.splade-pp-ed.md => regressions-dl23-passage.splade-pp-ed.cached.md} (88%) rename docs/regressions/{regressions-dl23-passage.splade-pp-sd.md => regressions-dl23-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-dl23-passage.unicoil-0shot.md => regressions-dl23-passage.unicoil-0shot.cached.md} (88%) rename docs/regressions/{regressions-dl23-passage.unicoil-noexp-0shot.md => regressions-dl23-passage.unicoil-noexp-0shot.cached.md} (85%) rename docs/regressions/{regressions-msmarco-v1-doc-segmented.unicoil-noexp.md => regressions-msmarco-v1-doc-segmented.unicoil-noexp.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v1-doc-segmented.unicoil.md => regressions-msmarco-v1-doc-segmented.unicoil.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.md => regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.md => regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md} (87%) rename docs/regressions/{regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.md => regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md} (87%) rename docs/regressions/{regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.md => regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md} (86%) rename docs/regressions/{regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.md => regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.md => regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md} (87%) rename docs/regressions/{regressions-msmarco-v1-passage.deepimpact.md => regressions-msmarco-v1-passage.deepimpact.cached.md} (90%) rename docs/regressions/{regressions-msmarco-v1-passage.distill-splade-max.md => regressions-msmarco-v1-passage.distill-splade-max.cached.md} (90%) rename docs/regressions/{regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.md => regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v1-passage.openai-ada2.hnsw.md => regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md} (86%) rename docs/regressions/{regressions-msmarco-v1-passage.splade-pp-ed.md => regressions-msmarco-v1-passage.splade-pp-ed.cached.md} (89%) rename docs/regressions/{regressions-msmarco-v1-passage.splade-pp-sd.md => regressions-msmarco-v1-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v1-passage.unicoil-noexp.md => regressions-msmarco-v1-passage.unicoil-noexp.cached.md} (94%) rename docs/regressions/{regressions-msmarco-v1-passage.unicoil-tilde-expansion.md => regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v1-passage.unicoil.md => regressions-msmarco-v1-passage.unicoil.cached.md} (94%) rename docs/regressions/{regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.md => regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v2-doc-segmented.unicoil-0shot.md => regressions-msmarco-v2-doc-segmented.unicoil-0shot.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.md => regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.md => regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v2-passage.splade-pp-ed.md => regressions-msmarco-v2-passage.splade-pp-ed.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v2-passage.splade-pp-sd.md => regressions-msmarco-v2-passage.splade-pp-sd.cached.md} (88%) rename docs/regressions/{regressions-msmarco-v2-passage.unicoil-0shot.md => regressions-msmarco-v2-passage.unicoil-0shot.cached.md} (95%) rename docs/regressions/{regressions-msmarco-v2-passage.unicoil-noexp-0shot.md => regressions-msmarco-v2-passage.unicoil-noexp-0shot.cached.md} (96%) rename src/main/resources/docgen/templates/{dl19-doc-segmented.unicoil-noexp.template => dl19-doc-segmented.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-doc-segmented.unicoil.template => dl19-doc-segmented.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.bge-base-en-v1.5.hnsw-int8.template => dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.bge-base-en-v1.5.hnsw.template => dl19-passage.bge-base-en-v1.5.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.cohere-embed-english-v3.0.hnsw-int8.template => dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.cohere-embed-english-v3.0.hnsw.template => dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.cos-dpr-distil.hnsw-int8.template => dl19-passage.cos-dpr-distil.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.cos-dpr-distil.hnsw.template => dl19-passage.cos-dpr-distil.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.openai-ada2.hnsw-int8.template => dl19-passage.openai-ada2.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.openai-ada2.hnsw.template => dl19-passage.openai-ada2.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.splade-pp-ed.template => dl19-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.splade-pp-sd.template => dl19-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.unicoil-noexp.template => dl19-passage.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{dl19-passage.unicoil.template => dl19-passage.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-doc-segmented.unicoil-noexp.template => dl20-doc-segmented.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-doc-segmented.unicoil.template => dl20-doc-segmented.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.bge-base-en-v1.5.hnsw-int8.template => dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.bge-base-en-v1.5.hnsw.template => dl20-passage.bge-base-en-v1.5.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.cohere-embed-english-v3.0.hnsw-int8.template => dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.cohere-embed-english-v3.0.hnsw.template => dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.cos-dpr-distil.hnsw-int8.template => dl20-passage.cos-dpr-distil.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.cos-dpr-distil.hnsw.template => dl20-passage.cos-dpr-distil.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.openai-ada2.hnsw-int8.template => dl20-passage.openai-ada2.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.openai-ada2.hnsw.template => dl20-passage.openai-ada2.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.splade-pp-ed.template => dl20-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.splade-pp-sd.template => dl20-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.unicoil-noexp.template => dl20-passage.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{dl20-passage.unicoil.template => dl20-passage.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-doc-segmented.unicoil-0shot-v2.template => dl21-doc-segmented.unicoil-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-doc-segmented.unicoil-0shot.template => dl21-doc-segmented.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-doc-segmented.unicoil-noexp-0shot-v2.template => dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-doc-segmented.unicoil-noexp-0shot.template => dl21-doc-segmented.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-passage.splade-pp-ed.template => dl21-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-passage.splade-pp-sd.template => dl21-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-passage.unicoil-0shot.template => dl21-passage.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl21-passage.unicoil-noexp-0shot.template => dl21-passage.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-doc-segmented.unicoil-0shot-v2.template => dl22-doc-segmented.unicoil-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-doc-segmented.unicoil-noexp-0shot-v2.template => dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-passage.splade-pp-ed.template => dl22-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-passage.splade-pp-sd.template => dl22-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-passage.unicoil-0shot.template => dl22-passage.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl22-passage.unicoil-noexp-0shot.template => dl22-passage.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-doc-segmented.unicoil-0shot-v2.template => dl23-doc-segmented.unicoil-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-doc-segmented.unicoil-noexp-0shot-v2.template => dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-passage.splade-pp-ed.template => dl23-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-passage.splade-pp-sd.template => dl23-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-passage.unicoil-0shot.template => dl23-passage.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{dl23-passage.unicoil-noexp-0shot.template => dl23-passage.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-doc-segmented.unicoil-noexp.template => msmarco-v1-doc-segmented.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-doc-segmented.unicoil.template => msmarco-v1-doc-segmented.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.template => msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.bge-base-en-v1.5.hnsw.template => msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.template => msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.template => msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.cos-dpr-distil.hnsw-int8.template => msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.cos-dpr-distil.hnsw.template => msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.deepimpact.template => msmarco-v1-passage.deepimpact.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.distill-splade-max.template => msmarco-v1-passage.distill-splade-max.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.openai-ada2.hnsw-int8.template => msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.openai-ada2.hnsw.template => msmarco-v1-passage.openai-ada2.hnsw.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.splade-pp-ed.template => msmarco-v1-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.splade-pp-sd.template => msmarco-v1-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.unicoil-noexp.template => msmarco-v1-passage.unicoil-noexp.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.unicoil-tilde-expansion.template => msmarco-v1-passage.unicoil-tilde-expansion.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v1-passage.unicoil.template => msmarco-v1-passage.unicoil.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-doc-segmented.unicoil-0shot-v2.template => msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-doc-segmented.unicoil-0shot.template => msmarco-v2-doc-segmented.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.template => msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-doc-segmented.unicoil-noexp-0shot.template => msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-passage.splade-pp-ed.template => msmarco-v2-passage.splade-pp-ed.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-passage.splade-pp-sd.template => msmarco-v2-passage.splade-pp-sd.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-passage.unicoil-0shot.template => msmarco-v2-passage.unicoil-0shot.cached.template} (100%) rename src/main/resources/docgen/templates/{msmarco-v2-passage.unicoil-noexp-0shot.template => msmarco-v2-passage.unicoil-noexp-0shot.cached.template} (100%) rename src/main/resources/regression/{dl19-doc-segmented.unicoil-noexp.yaml => dl19-doc-segmented.unicoil-noexp.cached.yaml} (100%) rename src/main/resources/regression/{dl19-doc-segmented.unicoil.yaml => dl19-doc-segmented.unicoil.cached.yaml} (100%) rename src/main/resources/regression/{dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml => dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{dl19-passage.bge-base-en-v1.5.hnsw.yaml => dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml => dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{dl19-passage.cohere-embed-english-v3.0.hnsw.yaml => dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{dl19-passage.cos-dpr-distil.hnsw-int8.yaml => dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{dl19-passage.cos-dpr-distil.hnsw.yaml => dl19-passage.cos-dpr-distil.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{dl19-passage.openai-ada2.hnsw-int8.yaml => dl19-passage.openai-ada2.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{dl19-passage.openai-ada2.hnsw.yaml => dl19-passage.openai-ada2.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{dl19-passage.splade-pp-ed.yaml => dl19-passage.splade-pp-ed.cached.yaml} (95%) rename src/main/resources/regression/{dl19-passage.splade-pp-sd.yaml => dl19-passage.splade-pp-sd.cached.yaml} (95%) rename src/main/resources/regression/{dl19-passage.unicoil-noexp.yaml => dl19-passage.unicoil-noexp.cached.yaml} (94%) rename src/main/resources/regression/{dl19-passage.unicoil.yaml => dl19-passage.unicoil.cached.yaml} (95%) rename src/main/resources/regression/{dl20-doc-segmented.unicoil-noexp.yaml => dl20-doc-segmented.unicoil-noexp.cached.yaml} (95%) rename src/main/resources/regression/{dl20-doc-segmented.unicoil.yaml => dl20-doc-segmented.unicoil.cached.yaml} (96%) rename src/main/resources/regression/{dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml => dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{dl20-passage.bge-base-en-v1.5.hnsw.yaml => dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml => dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{dl20-passage.cohere-embed-english-v3.0.hnsw.yaml => dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{dl20-passage.cos-dpr-distil.hnsw-int8.yaml => dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{dl20-passage.cos-dpr-distil.hnsw.yaml => dl20-passage.cos-dpr-distil.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{dl20-passage.openai-ada2.hnsw-int8.yaml => dl20-passage.openai-ada2.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{dl20-passage.openai-ada2.hnsw.yaml => dl20-passage.openai-ada2.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{dl20-passage.splade-pp-ed.yaml => dl20-passage.splade-pp-ed.cached.yaml} (94%) rename src/main/resources/regression/{dl20-passage.splade-pp-sd.yaml => dl20-passage.splade-pp-sd.cached.yaml} (94%) rename src/main/resources/regression/{dl20-passage.unicoil-noexp.yaml => dl20-passage.unicoil-noexp.cached.yaml} (94%) rename src/main/resources/regression/{dl20-passage.unicoil.yaml => dl20-passage.unicoil.cached.yaml} (95%) rename src/main/resources/regression/{dl21-doc-segmented.unicoil-0shot-v2.yaml => dl21-doc-segmented.unicoil-0shot-v2.cached.yaml} (96%) rename src/main/resources/regression/{dl21-doc-segmented.unicoil-0shot.yaml => dl21-doc-segmented.unicoil-0shot.cached.yaml} (98%) rename src/main/resources/regression/{dl21-doc-segmented.unicoil-noexp-0shot-v2.yaml => dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml} (95%) rename src/main/resources/regression/{dl21-doc-segmented.unicoil-noexp-0shot.yaml => dl21-doc-segmented.unicoil-noexp-0shot.cached.yaml} (98%) rename src/main/resources/regression/{dl21-passage.splade-pp-ed.yaml => dl21-passage.splade-pp-ed.cached.yaml} (95%) rename src/main/resources/regression/{dl21-passage.splade-pp-sd.yaml => dl21-passage.splade-pp-sd.cached.yaml} (95%) rename src/main/resources/regression/{dl21-passage.unicoil-0shot.yaml => dl21-passage.unicoil-0shot.cached.yaml} (95%) rename src/main/resources/regression/{dl21-passage.unicoil-noexp-0shot.yaml => dl21-passage.unicoil-noexp-0shot.cached.yaml} (95%) rename src/main/resources/regression/{dl22-doc-segmented.unicoil-0shot-v2.yaml => dl22-doc-segmented.unicoil-0shot-v2.cached.yaml} (96%) rename src/main/resources/regression/{dl22-doc-segmented.unicoil-noexp-0shot-v2.yaml => dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml} (95%) rename src/main/resources/regression/{dl22-passage.splade-pp-ed.yaml => dl22-passage.splade-pp-ed.cached.yaml} (95%) rename src/main/resources/regression/{dl22-passage.splade-pp-sd.yaml => dl22-passage.splade-pp-sd.cached.yaml} (95%) rename src/main/resources/regression/{dl22-passage.unicoil-0shot.yaml => dl22-passage.unicoil-0shot.cached.yaml} (95%) rename src/main/resources/regression/{dl22-passage.unicoil-noexp-0shot.yaml => dl22-passage.unicoil-noexp-0shot.cached.yaml} (95%) rename src/main/resources/regression/{dl23-doc-segmented.unicoil-0shot-v2.yaml => dl23-doc-segmented.unicoil-0shot-v2.cached.yaml} (96%) rename src/main/resources/regression/{dl23-doc-segmented.unicoil-noexp-0shot-v2.yaml => dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml} (95%) rename src/main/resources/regression/{dl23-passage.splade-pp-ed.yaml => dl23-passage.splade-pp-ed.cached.yaml} (95%) rename src/main/resources/regression/{dl23-passage.splade-pp-sd.yaml => dl23-passage.splade-pp-sd.cached.yaml} (95%) rename src/main/resources/regression/{dl23-passage.unicoil-0shot.yaml => dl23-passage.unicoil-0shot.cached.yaml} (95%) rename src/main/resources/regression/{dl23-passage.unicoil-noexp-0shot.yaml => dl23-passage.unicoil-noexp-0shot.cached.yaml} (95%) rename src/main/resources/regression/{msmarco-v1-doc-segmented.unicoil-noexp.yaml => msmarco-v1-doc-segmented.unicoil-noexp.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v1-doc-segmented.unicoil.yaml => msmarco-v1-doc-segmented.unicoil.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml => msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml => msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml => msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml => msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml => msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml} (97%) rename src/main/resources/regression/{msmarco-v1-passage.cos-dpr-distil.hnsw.yaml => msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml} (97%) rename src/main/resources/regression/{msmarco-v1-passage.deepimpact.yaml => msmarco-v1-passage.deepimpact.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v1-passage.distill-splade-max.yaml => msmarco-v1-passage.distill-splade-max.cached.yaml} (97%) rename src/main/resources/regression/{msmarco-v1-passage.openai-ada2.hnsw-int8.yaml => msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v1-passage.openai-ada2.hnsw.yaml => msmarco-v1-passage.openai-ada2.hnsw.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v1-passage.splade-pp-ed.yaml => msmarco-v1-passage.splade-pp-ed.cached.yaml} (95%) rename src/main/resources/regression/{msmarco-v1-passage.splade-pp-sd.yaml => msmarco-v1-passage.splade-pp-sd.cached.yaml} (95%) rename src/main/resources/regression/{msmarco-v1-passage.unicoil-noexp.yaml => msmarco-v1-passage.unicoil-noexp.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v1-passage.unicoil-tilde-expansion.yaml => msmarco-v1-passage.unicoil-tilde-expansion.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v1-passage.unicoil.yaml => msmarco-v1-passage.unicoil.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-doc-segmented.unicoil-0shot-v2.yaml => msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-doc-segmented.unicoil-0shot.yaml => msmarco-v2-doc-segmented.unicoil-0shot.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.yaml => msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-doc-segmented.unicoil-noexp-0shot.yaml => msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-passage.splade-pp-ed.yaml => msmarco-v2-passage.splade-pp-ed.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v2-passage.splade-pp-sd.yaml => msmarco-v2-passage.splade-pp-sd.cached.yaml} (98%) rename src/main/resources/regression/{msmarco-v2-passage.unicoil-0shot.yaml => msmarco-v2-passage.unicoil-0shot.cached.yaml} (100%) rename src/main/resources/regression/{msmarco-v2-passage.unicoil-noexp-0shot.yaml => msmarco-v2-passage.unicoil-noexp-0shot.cached.yaml} (100%) diff --git a/README.md b/README.md index fa77fdc4b4..5bbed8b75e 100644 --- a/README.md +++ b/README.md @@ -104,43 +104,43 @@ See individual pages for details. ### MS MARCO V1 Passage Regressions -| | dev | DL19 | DL20 | -|------------------------------------------------------|:-------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:| -| **Unsupervised Sparse** | | | | -| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-passage.md) | [+](docs/regressions/regressions-dl19-passage.md) | [+](docs/regressions/regressions-dl20-passage.md) | -| Quantized BM25 | [✓](docs/regressions/regressions-msmarco-v1-passage.bm25-b8.md) | [✓](docs/regressions/regressions-dl19-passage.bm25-b8.md) | [✓](docs/regressions/regressions-dl20-passage.bm25-b8.md) | -| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-passage.wp-tok.md) | [+](docs/regressions/regressions-dl19-passage.wp-tok.md) | [+](docs/regressions/regressions-dl20-passage.wp-tok.md) | -| WordPiece baselines (Huggingface tokenizer) | [+](docs/regressions/regressions-msmarco-v1-passage.wp-hgf.md) | [+](docs/regressions/regressions-dl19-passage.wp-hgf.md) | [+](docs/regressions/regressions-dl20-passage.wp-hgf.md) | -| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-passage.wp-ca.md) | [+](docs/regressions/regressions-dl19-passage.wp-ca.md) | [+](docs/regressions/regressions-dl20-passage.wp-ca.md) | -| doc2query | [+](docs/regressions/regressions-msmarco-v1-passage.doc2query.md) | | | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-passage.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-passage.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-passage.docTTTTTquery.md) | -| **Learned Sparse (uniCOIL family)** | | | | -| uniCOIL noexp | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.md) | [✓](docs/regressions/regressions-dl19-passage.unicoil-noexp.md) | [✓](docs/regressions/regressions-dl20-passage.unicoil-noexp.md) | -| uniCOIL with doc2query-T5 | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil.md) | [✓](docs/regressions/regressions-dl19-passage.unicoil.md) | [✓](docs/regressions/regressions-dl20-passage.unicoil.md) | -| uniCOIL with TILDE | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.md) | | | -| **Learned Sparse (other)** | | | | -| DeepImpact | [✓](docs/regressions/regressions-msmarco-v1-passage.deepimpact.md) | | | -| SPLADEv2 | [✓](docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.md) | | | -| SPLADE++ CoCondenser-EnsembleDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-ed.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-ed.md) | -| SPLADE++ CoCondenser-EnsembleDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-ed.onnx.md) | -| SPLADE++ CoCondenser-SelfDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-sd.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-sd.md) | -| SPLADE++ CoCondenser-SelfDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-sd.onnx.md) | -| **Learned Dense** (HNSW) | | | | -| cosDPR-distil w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.md) | -| cosDPR-distil w/ HSNW fp32 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md) | -| cosDPR-distil w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.md) | -| cosDPR-distil w/ HSNW int8 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md) | -| BGE-base-en-v1.5 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.md) | -| BGE-base-en-v1.5 w/ HNSW fp32 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md) | -| BGE-base-en-v1.5 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.md) | -| BGE-base-en-v1.5 w/ HNSW int8 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | -| OpenAI Ada2 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.md) | [✓](docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.md) | [✓](docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.md) | -| OpenAI Ada2 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.md) | [✓](docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.md) | [✓](docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.md) | -| Cohere English v3.0 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.md) | [✓](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.md) | [✓](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.md) | -| Cohere English v3.0 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.md) | [✓](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.md) | [✓](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.md) | -| **Learned Dense** (Inverted; experimental) | | | | -| cosDPR-distil w/ "fake words" (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.fw.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.fw.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.fw.md) | -| cosDPR-distil w/ "LexLSH" (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.lexlsh.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.lexlsh.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.lexlsh.md) | +| | dev | DL19 | DL20 | +|------------------------------------------------------|:--------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------:| +| **Unsupervised Sparse** | | | | +| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-passage.md) | [+](docs/regressions/regressions-dl19-passage.md) | [+](docs/regressions/regressions-dl20-passage.md) | +| Quantized BM25 | [✓](docs/regressions/regressions-msmarco-v1-passage.bm25-b8.md) | [✓](docs/regressions/regressions-dl19-passage.bm25-b8.md) | [✓](docs/regressions/regressions-dl20-passage.bm25-b8.md) | +| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-passage.wp-tok.md) | [+](docs/regressions/regressions-dl19-passage.wp-tok.md) | [+](docs/regressions/regressions-dl20-passage.wp-tok.md) | +| WordPiece baselines (Huggingface tokenizer) | [+](docs/regressions/regressions-msmarco-v1-passage.wp-hgf.md) | [+](docs/regressions/regressions-dl19-passage.wp-hgf.md) | [+](docs/regressions/regressions-dl20-passage.wp-hgf.md) | +| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-passage.wp-ca.md) | [+](docs/regressions/regressions-dl19-passage.wp-ca.md) | [+](docs/regressions/regressions-dl20-passage.wp-ca.md) | +| doc2query | [+](docs/regressions/regressions-msmarco-v1-passage.doc2query.md) | | | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-passage.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-passage.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-passage.docTTTTTquery.md) | +| **Learned Sparse (uniCOIL family)** | | | | +| uniCOIL noexp | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md) | [✓](docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md) | [✓](docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md) | +| uniCOIL with doc2query-T5 | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md) | [✓](docs/regressions/regressions-dl19-passage.unicoil.cached.md) | [✓](docs/regressions/regressions-dl20-passage.unicoil.cached.md) | +| uniCOIL with TILDE | [✓](docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md) | | | +| **Learned Sparse (other)** | | | | +| DeepImpact | [✓](docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md) | | | +| SPLADEv2 | [✓](docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md) | | | +| SPLADE++ CoCondenser-EnsembleDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md) | +| SPLADE++ CoCondenser-EnsembleDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-ed.onnx.md) | +| SPLADE++ CoCondenser-SelfDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md) | +| SPLADE++ CoCondenser-SelfDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.splade-pp-sd.onnx.md) | +| **Learned Dense** (HNSW) | | | | +| cosDPR-distil w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md) | +| cosDPR-distil w/ HSNW fp32 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.onnx.md) | +| cosDPR-distil w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md) | +| cosDPR-distil w/ HSNW int8 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.onnx.md) | +| BGE-base-en-v1.5 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md) | +| BGE-base-en-v1.5 w/ HNSW fp32 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.onnx.md) | +| BGE-base-en-v1.5 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md) | +| BGE-base-en-v1.5 w/ HNSW int8 (ONNX) | [✓](docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | [✓](docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.md) | +| OpenAI Ada2 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md) | [✓](docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md) | [✓](docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md) | +| OpenAI Ada2 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md) | +| Cohere English v3.0 w/ HNSW fp32 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md) | [✓](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md) | [✓](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md) | +| Cohere English v3.0 w/ HNSW int8 (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md) | [✓](docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md) | +| **Learned Dense** (Inverted; experimental) | | | | +| cosDPR-distil w/ "fake words" (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.fw.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.fw.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.fw.md) | +| cosDPR-distil w/ "LexLSH" (cached queries) | [✓](docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.lexlsh.md) | [✓](docs/regressions/regressions-dl19-passage.cos-dpr-distil.lexlsh.md) | [✓](docs/regressions/regressions-dl20-passage.cos-dpr-distil.lexlsh.md) | ### Available Corpora for Download @@ -165,22 +165,22 @@ See individual pages for details. ### MS MARCO V1 Document Regressions -| | dev | DL19 | DL20 | -|-----------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------:|:---------------------------------------------------------------------:|:---------------------------------------------------------------------:| +| | dev | DL19 | DL20 | +|-----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------:|:----------------------------------------------------------------------------:|:----------------------------------------------------------------------------:| | **Unsupervised Lexical, Complete Doc**[*](docs/experiments-msmarco-doc-doc2query-details.md) | -| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc.md) | [+](docs/regressions/regressions-dl19-doc.md) | [+](docs/regressions/regressions-dl20-doc.md) | -| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-doc.wp-tok.md) | [+](docs/regressions/regressions-dl19-doc.wp-tok.md) | [+](docs/regressions/regressions-dl20-doc.wp-tok.md) | -| WordPiece baselines (Huggingface tokenizer) | [+](docs/regressions/regressions-msmarco-v1-doc.wp-hgf.md) | [+](docs/regressions/regressions-dl19-doc.wp-hgf.md) | [+](docs/regressions/regressions-dl20-doc.wp-hgf.md) | -| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc.wp-ca.md) | [+](docs/regressions/regressions-dl19-doc.wp-ca.md) | [+](docs/regressions/regressions-dl20-doc.wp-ca.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-doc.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-doc.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-doc.docTTTTTquery.md) | +| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc.md) | [+](docs/regressions/regressions-dl19-doc.md) | [+](docs/regressions/regressions-dl20-doc.md) | +| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-doc.wp-tok.md) | [+](docs/regressions/regressions-dl19-doc.wp-tok.md) | [+](docs/regressions/regressions-dl20-doc.wp-tok.md) | +| WordPiece baselines (Huggingface tokenizer) | [+](docs/regressions/regressions-msmarco-v1-doc.wp-hgf.md) | [+](docs/regressions/regressions-dl19-doc.wp-hgf.md) | [+](docs/regressions/regressions-dl20-doc.wp-hgf.md) | +| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc.wp-ca.md) | [+](docs/regressions/regressions-dl19-doc.wp-ca.md) | [+](docs/regressions/regressions-dl20-doc.wp-ca.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-doc.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-doc.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-doc.docTTTTTquery.md) | | **Unsupervised Lexical, Segmented Doc**[*](docs/experiments-msmarco-doc-doc2query-details.md) | -| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.md) | [+](docs/regressions/regressions-dl19-doc-segmented.md) | [+](docs/regressions/regressions-dl20-doc-segmented.md) | -| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.wp-tok.md) | [+](docs/regressions/regressions-dl19-doc-segmented.wp-tok.md) | [+](docs/regressions/regressions-dl20-doc-segmented.wp-tok.md) | -| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.wp-ca.md) | [+](docs/regressions/regressions-dl19-doc-segmented.wp-ca.md) | [+](docs/regressions/regressions-dl20-doc-segmented.wp-ca.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-doc-segmented.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-doc-segmented.docTTTTTquery.md) | +| Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.md) | [+](docs/regressions/regressions-dl19-doc-segmented.md) | [+](docs/regressions/regressions-dl20-doc-segmented.md) | +| WordPiece baselines (pre-tokenized) | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.wp-tok.md) | [+](docs/regressions/regressions-dl19-doc-segmented.wp-tok.md) | [+](docs/regressions/regressions-dl20-doc-segmented.wp-tok.md) | +| WordPiece + Lucene BoW baselines | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.wp-ca.md) | [+](docs/regressions/regressions-dl19-doc-segmented.wp-ca.md) | [+](docs/regressions/regressions-dl20-doc-segmented.wp-ca.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v1-doc-segmented.docTTTTTquery.md) | [+](docs/regressions/regressions-dl19-doc-segmented.docTTTTTquery.md) | [+](docs/regressions/regressions-dl20-doc-segmented.docTTTTTquery.md) | | **Learned Sparse Lexical** | -| uniCOIL noexp | [✓](docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.md) | [✓](docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.md) | [✓](docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.md) | -| uniCOIL with doc2query-T5 | [✓](docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.md) | [✓](docs/regressions/regressions-dl19-doc-segmented.unicoil.md) | [✓](docs/regressions/regressions-dl20-doc-segmented.unicoil.md) | +| uniCOIL noexp | [✓](docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.cached.md) | [✓](docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.cached.md) | [✓](docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.cached.md) | +| uniCOIL with doc2query-T5 | [✓](docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.cached.md) | [✓](docs/regressions/regressions-dl19-doc-segmented.unicoil.cached.md) | [✓](docs/regressions/regressions-dl20-doc-segmented.unicoil.cached.md) | ### Available Corpora for Download @@ -195,21 +195,21 @@ See individual pages for details. ### MS MARCO V2 Passage Regressions -| | dev | DL21 | DL22 | DL23 | -|------------------------------------------------------|:---------------------------------------------------------------------------:|:---------------------------------------------------------------------:|:---------------------------------------------------------------------:|:---------------------------------------------------------------------:| -| **Unsupervised Lexical, Original Corpus** | | | | | -| baselines | [+](docs/regressions/regressions-msmarco-v2-passage.md) | [+](docs/regressions/regressions-dl21-passage.md) | [+](docs/regressions/regressions-dl22-passage.md) | [+](docs/regressions/regressions-dl23-passage.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl21-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl22-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl23-passage.d2q-t5.md) | -| **Unsupervised Lexical, Augmented Corpus** | | | | | -| baselines | [+](docs/regressions/regressions-msmarco-v2-passage-augmented.md) | [+](docs/regressions/regressions-dl21-passage-augmented.md) | [+](docs/regressions/regressions-dl22-passage-augmented.md) | [+](docs/regressions/regressions-dl23-passage-augmented.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl21-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl22-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl23-passage-augmented.d2q-t5.md) | -| **Learned Sparse Lexical** | | | | | -| uniCOIL noexp zero-shot | [✓](docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.md) | [✓](docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.md) | [✓](docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.md) | [✓](docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.md) | -| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.md) | [✓](docs/regressions/regressions-dl21-passage.unicoil-0shot.md) | [✓](docs/regressions/regressions-dl22-passage.unicoil-0shot.md) | [✓](docs/regressions/regressions-dl23-passage.unicoil-0shot.md) | -| SPLADE++ CoCondenser-EnsembleDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-ed.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-ed.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-ed.md) | -| SPLADE++ CoCondenser-EnsembleDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-ed.onnx.md) | -| SPLADE++ CoCondenser-SelfDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-sd.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-sd.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-sd.md) | -| SPLADE++ CoCondenser-SelfDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-sd.onnx.md) | +| | dev | DL21 | DL22 | DL23 | +|------------------------------------------------------|:----------------------------------------------------------------------------------:|:----------------------------------------------------------------------------:|:----------------------------------------------------------------------------:|:----------------------------------------------------------------------------:| +| **Unsupervised Lexical, Original Corpus** | | | | | +| baselines | [+](docs/regressions/regressions-msmarco-v2-passage.md) | [+](docs/regressions/regressions-dl21-passage.md) | [+](docs/regressions/regressions-dl22-passage.md) | [+](docs/regressions/regressions-dl23-passage.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl21-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl22-passage.d2q-t5.md) | [+](docs/regressions/regressions-dl23-passage.d2q-t5.md) | +| **Unsupervised Lexical, Augmented Corpus** | | | | | +| baselines | [+](docs/regressions/regressions-msmarco-v2-passage-augmented.md) | [+](docs/regressions/regressions-dl21-passage-augmented.md) | [+](docs/regressions/regressions-dl22-passage-augmented.md) | [+](docs/regressions/regressions-dl23-passage-augmented.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl21-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl22-passage-augmented.d2q-t5.md) | [+](docs/regressions/regressions-dl23-passage-augmented.d2q-t5.md) | +| **Learned Sparse Lexical** | | | | | +| uniCOIL noexp zero-shot | [✓](docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.cached.md) | [✓](docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.cached.md) | [✓](docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.cached.md) | [✓](docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.cached.md) | +| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.cached.md) | [✓](docs/regressions/regressions-dl21-passage.unicoil-0shot.cached.md) | [✓](docs/regressions/regressions-dl22-passage.unicoil-0shot.cached.md) | [✓](docs/regressions/regressions-dl23-passage.unicoil-0shot.cached.md) | +| SPLADE++ CoCondenser-EnsembleDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.cached.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-ed.cached.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-ed.cached.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-ed.cached.md) | +| SPLADE++ CoCondenser-EnsembleDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-ed.onnx.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-ed.onnx.md) | +| SPLADE++ CoCondenser-SelfDistil (cached queries) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.cached.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-sd.cached.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-sd.cached.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-sd.cached.md) | +| SPLADE++ CoCondenser-SelfDistil (ONNX) | [✓](docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl21-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl22-passage.splade-pp-sd.onnx.md) | [✓](docs/regressions/regressions-dl23-passage.splade-pp-sd.onnx.md) | ### Available Corpora for Download @@ -226,17 +226,17 @@ See individual pages for details. ### MS MARCO V2 Document Regressions -| | dev | DL21 | DL22 | DL23 | -|-----------------------------------------|:------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------:|:------------------------------------------------------------------------------:|:------------------------------------------------------------------------------:| -| **Unsupervised Lexical, Complete Doc** | | | | | -| baselines | [+](docs/regressions/regressions-msmarco-v2-doc.md) | [+](docs/regressions/regressions-dl21-doc.md) | [+](docs/regressions/regressions-dl22-doc.md) | [+](docs/regressions/regressions-dl23-doc.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl21-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl22-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl23-doc.d2q-t5.md) | -| **Unsupervised Lexical, Segmented Doc** | | | | | -| baselines | [+](docs/regressions/regressions-msmarco-v2-doc-segmented.md) | [+](docs/regressions/regressions-dl21-doc-segmented.md) | [+](docs/regressions/regressions-dl22-doc-segmented.md) | [+](docs/regressions/regressions-dl23-doc-segmented.md) | -| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl21-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl22-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl23-doc-segmented.d2q-t5.md) | -| **Learned Sparse Lexical** | | | | | -| uniCOIL noexp zero-shot | [✓](docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.md) | [✓](docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.md) | [✓](docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.md) | [✓](docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.md) | -| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.md) | [✓](docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.md) | [✓](docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.md) | [✓](docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.md) | +| | dev | DL21 | DL22 | DL23 | +|-----------------------------------------|:-------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------:| +| **Unsupervised Lexical, Complete Doc** | | | | | +| baselines | [+](docs/regressions/regressions-msmarco-v2-doc.md) | [+](docs/regressions/regressions-dl21-doc.md) | [+](docs/regressions/regressions-dl22-doc.md) | [+](docs/regressions/regressions-dl23-doc.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl21-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl22-doc.d2q-t5.md) | [+](docs/regressions/regressions-dl23-doc.d2q-t5.md) | +| **Unsupervised Lexical, Segmented Doc** | | | | | +| baselines | [+](docs/regressions/regressions-msmarco-v2-doc-segmented.md) | [+](docs/regressions/regressions-dl21-doc-segmented.md) | [+](docs/regressions/regressions-dl22-doc-segmented.md) | [+](docs/regressions/regressions-dl23-doc-segmented.md) | +| doc2query-T5 | [+](docs/regressions/regressions-msmarco-v2-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl21-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl22-doc-segmented.d2q-t5.md) | [+](docs/regressions/regressions-dl23-doc-segmented.d2q-t5.md) | +| **Learned Sparse Lexical** | | | | | +| uniCOIL noexp zero-shot | [✓](docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.md) | +| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.cached.md) | [✓](docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.cached.md) | ### Available Corpora for Download diff --git a/docs/regressions.md b/docs/regressions.md index 9d7e265481..c0765ddee7 100644 --- a/docs/regressions.md +++ b/docs/regressions.md @@ -42,29 +42,29 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.wp-ca >& logs/log.msmarco-v1-passage.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.doc2query >& logs/log.msmarco-v1-passage.doc2query.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.docTTTTTquery >& logs/log.msmarco-v1-passage.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact >& logs/log.msmarco-v1-passage.deepimpact.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil >& logs/log.msmarco-v1-passage.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp >& logs/log.msmarco-v1-passage.unicoil-noexp.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion >& logs/log.msmarco-v1-passage.unicoil-tilde-expansion.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max >& logs/log.msmarco-v1-passage.distill-splade-max.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed >& logs/log.msmarco-v1-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact.cached >& logs/log.msmarco-v1-passage.deepimpact.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil.cached >& logs/log.msmarco-v1-passage.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp.cached >& logs/log.msmarco-v1-passage.unicoil-noexp.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion.cached >& logs/log.msmarco-v1-passage.unicoil-tilde-expansion.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max.cached >& logs/log.msmarco-v1-passage.distill-splade-max.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.cached >& logs/log.msmarco-v1-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.onnx >& logs/log.msmarco-v1-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd >& logs/log.msmarco-v1-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.cached >& logs/log.msmarco-v1-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.onnx >& logs/log.msmarco-v1-passage.splade-pp-sd.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.cached >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.onnx >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8 >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx >& logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.fw >& logs/log.msmarco-v1-passage.cos-dpr-distil.fw.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.lexlsh >& logs/log.msmarco-v1-passage.cos-dpr-distil.lexlsh.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx >& logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw >& logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8 >& logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw >& logs/log.msmarco-v1-passage.openai-ada2.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8 >& logs/log.msmarco-v1-passage.openai-ada2.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached >& logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached >& logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw.cached >& logs/log.msmarco-v1-passage.openai-ada2.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8.cached >& logs/log.msmarco-v1-passage.openai-ada2.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc >& logs/log.msmarco-v1-doc.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc.wp-tok >& logs/log.msmarco-v1-doc.wp-tok.txt & @@ -75,8 +75,8 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.wp-tok >& logs/log.msmarco-v1-doc-segmented.wp-tok.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.wp-ca >& logs/log.msmarco-v1-doc-segmented.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.docTTTTTquery >& logs/log.msmarco-v1-doc-segmented.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil >& logs/log.msmarco-v1-doc-segmented.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp >& logs/log.msmarco-v1-doc-segmented.unicoil-noexp.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil.cached >& logs/log.msmarco-v1-doc-segmented.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp.cached >& logs/log.msmarco-v1-doc-segmented.unicoil-noexp.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage >& logs/log.dl19-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bm25-b8 >& logs/log.dl19-passage.bm25-b8.txt & @@ -84,26 +84,26 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.wp-hgf >& logs/log.dl19-passage.wp-hgf.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.wp-ca >& logs/log.dl19-passage.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.docTTTTTquery >& logs/log.dl19-passage.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil >& logs/log.dl19-passage.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp >& logs/log.dl19-passage.unicoil-noexp.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed >& logs/log.dl19-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil.cached >& logs/log.dl19-passage.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp.cached >& logs/log.dl19-passage.unicoil-noexp.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed.cached >& logs/log.dl19-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed.onnx >& logs/log.dl19-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd >& logs/log.dl19-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd.cached >& logs/log.dl19-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd.onnx >& logs/log.dl19-passage.splade-pp-sd.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw >& logs/log.dl19-passage.cos-dpr-distil.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.cached >& logs/log.dl19-passage.cos-dpr-distil.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.onnx >& logs/log.dl19-passage.cos-dpr-distil.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8 >& logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached >& logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.onnx >& logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.fw >& logs/log.dl19-passage.cos-dpr-distil.fw.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.lexlsh >& logs/log.dl19-passage.cos-dpr-distil.lexlsh.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.onnx >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8 >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx >& logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw >& logs/log.dl19-passage.openai-ada2.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8 >& logs/log.dl19-passage.openai-ada2.hnsw-int8.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw >& logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8 >& logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw.cached >& logs/log.dl19-passage.openai-ada2.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8.cached >& logs/log.dl19-passage.openai-ada2.hnsw-int8.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached >& logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached >& logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc >& logs/log.dl19-doc.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc.wp-tok >& logs/log.dl19-doc.wp-tok.txt & @@ -114,8 +114,8 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.wp-tok >& logs/log.dl19-doc-segmented.wp-tok.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.wp-ca >& logs/log.dl19-doc-segmented.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.docTTTTTquery >& logs/log.dl19-doc-segmented.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil >& logs/log.dl19-doc-segmented.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp >& logs/log.dl19-doc-segmented.unicoil-noexp.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil.cached >& logs/log.dl19-doc-segmented.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp.cached >& logs/log.dl19-doc-segmented.unicoil-noexp.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage >& logs/log.dl20-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bm25-b8 >& logs/log.dl20-passage.bm25-b8.txt & @@ -123,26 +123,26 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.wp-hgf >& logs/log.dl20-passage.wp-hgf.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.wp-ca >& logs/log.dl20-passage.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.docTTTTTquery >& logs/log.dl20-passage.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil >& logs/log.dl20-passage.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp >& logs/log.dl20-passage.unicoil-noexp.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed >& logs/log.dl20-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil.cached >& logs/log.dl20-passage.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp.cached >& logs/log.dl20-passage.unicoil-noexp.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed.cached >& logs/log.dl20-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed.onnx >& logs/log.dl20-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd >& logs/log.dl20-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd.cached >& logs/log.dl20-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd.onnx >& logs/log.dl20-passage.splade-pp-sd.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw >& logs/log.dl20-passage.cos-dpr-distil.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.cached >& logs/log.dl20-passage.cos-dpr-distil.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.onnx >& logs/log.dl20-passage.cos-dpr-distil.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8 >& logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached >& logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.onnx >& logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.fw >& logs/log.dl20-passage.cos-dpr-distil.fw.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.lexlsh >& logs/log.dl20-passage.cos-dpr-distil.lexlsh.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.onnx >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8 >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx >& logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw >& logs/log.dl20-passage.openai-ada2.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8 >& logs/log.dl20-passage.openai-ada2.hnsw-int8.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw >& logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8 >& logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw.cached >& logs/log.dl20-passage.openai-ada2.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8.cached >& logs/log.dl20-passage.openai-ada2.hnsw-int8.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached >& logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached >& logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc >& logs/log.dl20-doc.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc.wp-tok >& logs/log.dl20-doc.wp-tok.txt & @@ -153,8 +153,8 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.wp-tok >& logs/log.dl20-doc-segmented.wp-tok.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.wp-ca >& logs/log.dl20-doc-segmented.wp-ca.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.docTTTTTquery >& logs/log.dl20-doc-segmented.docTTTTTquery.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil >& logs/log.dl20-doc-segmented.unicoil.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp >& logs/log.dl20-doc-segmented.unicoil-noexp.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil.cached >& logs/log.dl20-doc-segmented.unicoil.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp.cached >& logs/log.dl20-doc-segmented.unicoil-noexp.cached.txt & ``` @@ -164,11 +164,11 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre ```bash nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage >& logs/log.msmarco-v2-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.d2q-t5 >& logs/log.msmarco-v2-passage.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot >& logs/log.msmarco-v2-passage.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot >& logs/log.msmarco-v2-passage.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed >& logs/log.msmarco-v2-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot.cached >& logs/log.msmarco-v2-passage.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot.cached >& logs/log.msmarco-v2-passage.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.cached >& logs/log.msmarco-v2-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.onnx >& logs/log.msmarco-v2-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd >& logs/log.msmarco-v2-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.cached >& logs/log.msmarco-v2-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.onnx >& logs/log.msmarco-v2-passage.splade-pp-sd.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-augmented >& logs/log.msmarco-v2-passage-augmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-augmented.d2q-t5 >& logs/log.msmarco-v2-passage-augmented.d2q-t5.txt & @@ -177,18 +177,18 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc.d2q-t5 >& logs/log.msmarco-v2-doc.d2q-t5.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented >& logs/log.msmarco-v2-doc-segmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.d2q-t5 >& logs/log.msmarco-v2-doc-segmented.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot >& logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2 >& logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot >& logs/log.msmarco-v2-doc-segmented.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2 >& logs/log.msmarco-v2-doc-segmented.unicoil-0shot-v2.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached >& logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached >& logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot.cached >& logs/log.msmarco-v2-doc-segmented.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached >& logs/log.msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage >& logs/log.dl21-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.d2q-t5 >& logs/log.dl21-passage.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot >& logs/log.dl21-passage.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot >& logs/log.dl21-passage.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed >& logs/log.dl21-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot.cached >& logs/log.dl21-passage.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot.cached >& logs/log.dl21-passage.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed.cached >& logs/log.dl21-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed.onnx >& logs/log.dl21-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd >& logs/log.dl21-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd.cached >& logs/log.dl21-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd.onnx >& logs/log.dl21-passage.splade-pp-sd.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-augmented >& logs/log.dl21-passage-augmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-augmented.d2q-t5 >& logs/log.dl21-passage-augmented.d2q-t5.txt & @@ -197,18 +197,18 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc.d2q-t5 >& logs/log.dl21-doc.d2q-t5.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented >& logs/log.dl21-doc-segmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.d2q-t5 >& logs/log.dl21-doc-segmented.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot >& logs/log.dl21-doc-segmented.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2 >& logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot >& logs/log.dl21-doc-segmented.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2 >& logs/log.dl21-doc-segmented.unicoil-0shot-v2.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached >& logs/log.dl21-doc-segmented.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached >& logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot.cached >& logs/log.dl21-doc-segmented.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached >& logs/log.dl21-doc-segmented.unicoil-0shot-v2.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage >& logs/log.dl22-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.d2q-t5 >& logs/log.dl22-passage.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot >& logs/log.dl22-passage.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot >& logs/log.dl22-passage.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed >& logs/log.dl22-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot.cached >& logs/log.dl22-passage.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot.cached >& logs/log.dl22-passage.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed.cached >& logs/log.dl22-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed.onnx >& logs/log.dl22-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd >& logs/log.dl22-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd.cached >& logs/log.dl22-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd.onnx >& logs/log.dl22-passage.splade-pp-sd.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage-augmented >& logs/log.dl22-passage-augmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-passage-augmented.d2q-t5 >& logs/log.dl22-passage-augmented.d2q-t5.txt & @@ -217,16 +217,16 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc.d2q-t5 >& logs/log.dl22-doc.d2q-t5.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented >& logs/log.dl22-doc-segmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.d2q-t5 >& logs/log.dl22-doc-segmented.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2 >& logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2 >& logs/log.dl22-doc-segmented.unicoil-0shot-v2.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached >& logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached >& logs/log.dl22-doc-segmented.unicoil-0shot-v2.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage >& logs/log.dl23-passage.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.d2q-t5 >& logs/log.dl23-passage.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot >& logs/log.dl23-passage.unicoil-noexp-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot >& logs/log.dl23-passage.unicoil-0shot.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed >& logs/log.dl23-passage.splade-pp-ed.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot.cached >& logs/log.dl23-passage.unicoil-noexp-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot.cached >& logs/log.dl23-passage.unicoil-0shot.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed.cached >& logs/log.dl23-passage.splade-pp-ed.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed.onnx >& logs/log.dl23-passage.splade-pp-ed.onnx.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd >& logs/log.dl23-passage.splade-pp-sd.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd.cached >& logs/log.dl23-passage.splade-pp-sd.cached.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd.onnx >& logs/log.dl23-passage.splade-pp-sd.onnx.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage-augmented >& logs/log.dl23-passage-augmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-passage-augmented.d2q-t5 >& logs/log.dl23-passage-augmented.d2q-t5.txt & @@ -235,8 +235,8 @@ nohup python src/main/python/run_regression.py --index --verify --search --regre nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc.d2q-t5 >& logs/log.dl23-doc.d2q-t5.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented >& logs/log.dl23-doc-segmented.txt & nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.d2q-t5 >& logs/log.dl23-doc-segmented.d2q-t5.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2 >& logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.txt & -nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2 >& logs/log.dl23-doc-segmented.unicoil-0shot-v2.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached >& logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.txt & +nohup python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached >& logs/log.dl23-doc-segmented.unicoil-0shot-v2.cached.txt & ``` diff --git a/docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.md b/docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.cached.md similarity index 95% rename from docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.md rename to docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.cached.md index d2da6b4f2e..035b18015d 100644 --- a/docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.md +++ b/docs/regressions/regressions-dl19-doc-segmented.unicoil-noexp.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (without any expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil-noexp.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil-noexp ``` @@ -152,7 +152,7 @@ However, for these topics, we get the same effectiveness results; that is, the t ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@mayankanand007](https://github.com/mayankanand007) on 2022-02-28 (commit [`950d7fd`](https://github.com/castorini/anserini/commit/950d7fd88dbb87f39e9c1f6ccf9e41cbb6f04f36)) diff --git a/docs/regressions/regressions-dl19-doc-segmented.unicoil.md b/docs/regressions/regressions-dl19-doc-segmented.unicoil.cached.md similarity index 95% rename from docs/regressions/regressions-dl19-doc-segmented.unicoil.md rename to docs/regressions/regressions-dl19-doc-segmented.unicoil.cached.md index 1594af3315..a8dabfa85f 100644 --- a/docs/regressions/regressions-dl19-doc-segmented.unicoil.md +++ b/docs/regressions/regressions-dl19-doc-segmented.unicoil.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (with doc2query-T5 expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-doc-segmented.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-doc-segmented.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil +python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented.unicoil.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil ``` @@ -152,7 +152,7 @@ However, for these topics, we get the same effectiveness results; that is, the t ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@mayankanand007](https://github.com/mayankanand007) on 2022-02-28 (commit [`950d7fd`](https://github.com/castorini/anserini/commit/950d7fd88dbb87f39e9c1f6ccf9e41cbb6f04f36)) diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.md rename to docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md index f9fc6e2c09..8963b4b576 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.843 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md index b391c9922f..7e397e8d73 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.md @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md similarity index 89% rename from docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.md rename to docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md index a88dc01e70..e17037c97e 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl19-passage.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.842 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md index af9ebe8310..fdd3e54987 100644 --- a/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md +++ b/docs/regressions/regressions-dl19-passage.bge-base-en-v1.5.hnsw.onnx.md @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md similarity index 88% rename from docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.md rename to docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index 56d1bb6d3a..bc2806a03c 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -103,8 +103,8 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.850 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.md b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md similarity index 87% rename from docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.md rename to docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md index 5ef2f503a5..e3878f97cd 100644 --- a/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.md +++ b/docs/regressions/regressions-dl19-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl19-passage.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -103,8 +103,8 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.851 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.md rename to docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md index 6b988049b1..b63e23f07a 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.805 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md index c939cf8e24..4536936fe2 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw-int8.onnx.md @@ -82,7 +82,7 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil & ``` @@ -91,10 +91,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md similarity index 89% rename from docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.md rename to docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md index 593ecd3275..8a453aef96 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.cos-dpr-distil.hnsw.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl19-passage.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.805 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md index 2f363f2ba1..6433867753 100644 --- a/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md +++ b/docs/regressions/regressions-dl19-passage.cos-dpr-distil.hnsw.onnx.md @@ -80,7 +80,7 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.txt \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt \ -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil & ``` @@ -89,10 +89,10 @@ Note that we are performing query inference "on-the-fly" with ONNX in these expe Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw.topics.dl19-passage.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-onnx.topics.dl19-passage.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.md rename to docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md index 62a75e5383..eb93add171 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.853 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.md b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md similarity index 90% rename from docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.md rename to docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md index 51695ca5db..8296c8220d 100644 --- a/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.md +++ b/docs/regressions/regressions-dl19-passage.openai-ada2.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.openai-ada2.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.openai-ada2.hnsw.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl19-passage.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl19-passage.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL19 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.857 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl19-passage.splade-pp-ed.md b/docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md similarity index 88% rename from docs/regressions/regressions-dl19-passage.splade-pp-ed.md rename to docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md index 03a4597e17..55595b42a4 100644 --- a/docs/regressions/regressions-dl19-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-dl19-passage.splade-pp-ed.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-splade-pp-ed.tar` is 4.2 GB and has MD5 checksum `e With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco-passage-splade-pp-ed ``` @@ -81,41 +81,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl19-passage.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl19-passage.splade-pp-ed.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl19-passage.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl19-passage.splade-pp-ed.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl19-passage.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl19-passage.splade-pp-ed.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl19-passage.splade-pp-ed.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl19-passage.splade-pp-ed.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl19-passage.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl19-passage.splade-pp-ed.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl19-passage.splade-pp-ed.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl19-passage.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl19-passage.splade-pp-ed.txt ``` ## Effectiveness @@ -138,7 +138,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) \ No newline at end of file diff --git a/docs/regressions/regressions-dl19-passage.splade-pp-sd.md b/docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-dl19-passage.splade-pp-sd.md rename to docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md index 988f6c75e9..9741f8a579 100644 --- a/docs/regressions/regressions-dl19-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-dl19-passage.splade-pp-sd.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-splade-pp-sd.tar` is 4.8 GB and has MD5 checksum `c With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco-passage-splade-pp-sd ``` @@ -81,41 +81,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl19-passage.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl19-passage.splade-pp-sd.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl19-passage.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl19-passage.splade-pp-sd.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl19-passage.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl19-passage.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl19-passage.splade-pp-sd.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl19-passage.splade-pp-sd.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl19-passage.splade-pp-sd.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl19-passage.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl19-passage.splade-pp-sd.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl19-passage.splade-pp-sd.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl19-passage.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl19-passage.splade-pp-sd.txt ``` ## Effectiveness @@ -138,7 +138,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) diff --git a/docs/regressions/regressions-dl19-passage.unicoil-noexp.md b/docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md similarity index 87% rename from docs/regressions/regressions-dl19-passage.unicoil-noexp.md rename to docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md index d2ecbc7da4..71064831a1 100644 --- a/docs/regressions/regressions-dl19-passage.unicoil-noexp.md +++ b/docs/regressions/regressions-dl19-passage.unicoil-noexp.cached.md @@ -13,13 +13,13 @@ Here, a variant model without expansion is used. Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -46,7 +46,7 @@ To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil-noexp.cached \ --corpus-path collections/msmarco-passage-unicoil-noexp ``` @@ -84,41 +84,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl19-passage.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl19-passage.unicoil-noexp.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl19-passage.unicoil-noexp.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl19-passage.unicoil-noexp.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl19-passage.unicoil-noexp.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl19-passage.unicoil-noexp.0shot.txt ``` ## Effectiveness @@ -141,7 +141,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@mayankanand007](https://github.com/mayankanand007) on 2022-02-28 (commit [`950d7fd`](https://github.com/castorini/anserini/commit/950d7fd88dbb87f39e9c1f6ccf9e41cbb6f04f36)) diff --git a/docs/regressions/regressions-dl19-passage.unicoil.md b/docs/regressions/regressions-dl19-passage.unicoil.cached.md similarity index 84% rename from docs/regressions/regressions-dl19-passage.unicoil.md rename to docs/regressions/regressions-dl19-passage.unicoil.cached.md index 9afc9adea8..873b939bf3 100644 --- a/docs/regressions/regressions-dl19-passage.unicoil.md +++ b/docs/regressions/regressions-dl19-passage.unicoil.cached.md @@ -13,13 +13,13 @@ However, the model is the same. Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl19-passage.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -46,7 +46,7 @@ To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef7 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression dl19-passage.unicoil.cached \ --corpus-path collections/msmarco-passage-unicoil ``` @@ -84,41 +84,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl19-passage.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl19-passage.unicoil.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl19-passage.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl19-passage.unicoil.0shot.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl19-passage.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl19-passage.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl19-passage.unicoil.0shot.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl19-passage.unicoil.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl19-passage.unicoil.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl19-passage.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl19-passage.unicoil.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl19-passage.unicoil.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl19-passage.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl19-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl19-passage.unicoil.0shot.txt ``` ## Effectiveness @@ -141,7 +141,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl19-passage.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@mayankanand007](https://github.com/mayankanand007) on 2022-02-28 (commit [`950d7fd`](https://github.com/castorini/anserini/commit/950d7fd88dbb87f39e9c1f6ccf9e41cbb6f04f36)) diff --git a/docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.md b/docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.cached.md similarity index 89% rename from docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.md rename to docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.cached.md index 9ebc78d589..30c067753a 100644 --- a/docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.md +++ b/docs/regressions/regressions-dl20-doc-segmented.unicoil-noexp.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (without any expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil-noexp.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil-noexp ``` @@ -82,41 +82,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt ``` ## Effectiveness @@ -152,7 +152,7 @@ However, for these topics, we get the same effectiveness results; that is, the t ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-dl20-doc-segmented.unicoil.md b/docs/regressions/regressions-dl20-doc-segmented.unicoil.cached.md similarity index 82% rename from docs/regressions/regressions-dl20-doc-segmented.unicoil.md rename to docs/regressions/regressions-dl20-doc-segmented.unicoil.cached.md index cec36e6dab..877fc32f47 100644 --- a/docs/regressions/regressions-dl20-doc-segmented.unicoil.md +++ b/docs/regressions/regressions-dl20-doc-segmented.unicoil.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (with doc2query-T5 expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-doc-segmented.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-doc-segmented.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil +python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented.unicoil.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil ``` @@ -82,41 +82,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-doc-segmented.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized -rocchio -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl20-doc.txt runs/run.msmarco-doc-segmented-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt ``` ## Effectiveness @@ -152,7 +152,7 @@ However, for these topics, we get the same effectiveness results; that is, the t ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.md rename to docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md index 81abaaf2e1..fd1587f9a5 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.840 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.md b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md similarity index 89% rename from docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.md rename to docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md index b7836f2a14..b2ddfc8642 100644 --- a/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.md +++ b/docs/regressions/regressions-dl20-passage.bge-base-en-v1.5.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.dl20.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.dl20.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.840 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md similarity index 88% rename from docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.md rename to docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index 545ba20561..853bdf96d7 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -103,8 +103,8 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.858 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.md b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md similarity index 87% rename from docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.md rename to docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md index 3c749cfb33..3a55193090 100644 --- a/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.md +++ b/docs/regressions/regressions-dl20-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -75,17 +75,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.dl20.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.dl20.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -103,8 +103,8 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.864 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.md rename to docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md index ffcc827b44..3f5731e703 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.843 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.md b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md similarity index 89% rename from docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.md rename to docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md index 3db35dbace..518c43bc4a 100644 --- a/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.md +++ b/docs/regressions/regressions-dl20-passage.cos-dpr-distil.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.cos-dpr-distil.hnsw.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.dl20.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.dl20.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.843 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md similarity index 90% rename from docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.md rename to docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md index dc0f17ba63..52013d0b68 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw-int8.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -82,17 +82,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -110,7 +110,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.866 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -118,4 +118,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.md b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md similarity index 90% rename from docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.md rename to docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md index 658892a2ec..a8ffbd836f 100644 --- a/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.md +++ b/docs/regressions/regressions-dl20-passage.openai-ada2.hnsw.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.openai-ada2.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.openai-ada2.hnsw.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -80,17 +80,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.dl20.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.dl20.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -108,7 +108,7 @@ With the above commands, you should be able to reproduce the following results: | [DL20 (Passage)](https://trec.nist.gov/data/deep2020.html) | 0.867 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml). Also note that retrieval metrics are computed to depth 1000 hits per query (as opposed to 100 hits per query for document ranking). Also, for computing nDCG, remember that we keep qrels of _all_ relevance grades, whereas for other metrics (e.g., AP), relevance grade 1 is considered not relevant (i.e., use the `-l 2` option in `trec_eval`). @@ -116,4 +116,4 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-dl20-passage.splade-pp-ed.md b/docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md similarity index 89% rename from docs/regressions/regressions-dl20-passage.splade-pp-ed.md rename to docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md index b964ee4d04..92b7822d83 100644 --- a/docs/regressions/regressions-dl20-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-dl20-passage.splade-pp-ed.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-splade-pp-ed.tar` is 4.2 GB and has MD5 checksum `e With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco-passage-splade-pp-ed ``` @@ -81,41 +81,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl20.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl20.splade-pp-ed.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl20.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl20.splade-pp-ed.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl20.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl20.splade-pp-ed.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl20.splade-pp-ed.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl20.splade-pp-ed.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl20.splade-pp-ed.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl20.splade-pp-ed.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl20.splade-pp-ed.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl20.splade-pp-ed.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl20.splade-pp-ed.txt ``` ## Effectiveness @@ -138,7 +138,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) \ No newline at end of file diff --git a/docs/regressions/regressions-dl20-passage.splade-pp-sd.md b/docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md similarity index 89% rename from docs/regressions/regressions-dl20-passage.splade-pp-sd.md rename to docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md index 38d2462f6a..c943e9882e 100644 --- a/docs/regressions/regressions-dl20-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-dl20-passage.splade-pp-sd.cached.md @@ -11,13 +11,13 @@ In these experiments, we are using pre-encoded queries (i.e., cached results of Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -25,7 +25,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-splade-pp-sd.tar` is 4.8 GB and has MD5 checksum `c With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco-passage-splade-pp-sd ``` @@ -81,41 +81,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl20.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl20.splade-pp-sd.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl20.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl20.splade-pp-sd.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl20.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl20.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl20.splade-pp-sd.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl20.splade-pp-sd.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl20.splade-pp-sd.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl20.splade-pp-sd.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl20.splade-pp-sd.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl20.splade-pp-sd.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl20.splade-pp-sd.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl20.splade-pp-sd.txt ``` ## Effectiveness @@ -138,7 +138,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) \ No newline at end of file diff --git a/docs/regressions/regressions-dl20-passage.unicoil-noexp.md b/docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md similarity index 87% rename from docs/regressions/regressions-dl20-passage.unicoil-noexp.md rename to docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md index 73ef6f0b0d..23a36df0cd 100644 --- a/docs/regressions/regressions-dl20-passage.unicoil-noexp.md +++ b/docs/regressions/regressions-dl20-passage.unicoil-noexp.cached.md @@ -13,13 +13,13 @@ Here, a variant model without expansion is used. Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -46,7 +46,7 @@ To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil-noexp.cached \ --corpus-path collections/msmarco-passage-unicoil-noexp ``` @@ -84,41 +84,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil-noexp/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q.topics.dl20.unicoil-noexp.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rm3.topics.dl20.unicoil-noexp.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached_q+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached.topics.dl20.unicoil-noexp.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rm3.topics.dl20.unicoil-noexp.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil-noexp.unicoil-noexp-cached+rocchio.topics.dl20.unicoil-noexp.0shot.txt ``` ## Effectiveness @@ -141,7 +141,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-dl20-passage.unicoil.md b/docs/regressions/regressions-dl20-passage.unicoil.cached.md similarity index 84% rename from docs/regressions/regressions-dl20-passage.unicoil.md rename to docs/regressions/regressions-dl20-passage.unicoil.cached.md index ade86e36e5..ad5df6c21c 100644 --- a/docs/regressions/regressions-dl20-passage.unicoil.md +++ b/docs/regressions/regressions-dl20-passage.unicoil.cached.md @@ -13,13 +13,13 @@ However, the model is the same. Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). For additional instructions on working with MS MARCO passage collection, refer to [this page](../../docs/experiments-msmarco-passage.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl20-passage.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -46,7 +46,7 @@ To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef7 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression dl20-passage.unicoil.cached \ --corpus-path collections/msmarco-passage-unicoil ``` @@ -84,41 +84,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized -rm3 & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.unicoil/ \ -topics tools/topics-and-qrels/topics.dl20.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt \ + -output runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt \ -impact -pretokenized -rocchio & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q.topics.dl20.unicoil.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rm3.topics.dl20.unicoil.0shot.txt - -bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt -bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached_q+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached.topics.dl20.unicoil.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rm3.topics.dl20.unicoil.0shot.txt + +bin/trec_eval -m map -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m ndcg_cut.10 -c tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.100 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt +bin/trec_eval -m recall.1000 -c -l 2 tools/topics-and-qrels/qrels.dl20-passage.txt runs/run.msmarco-passage-unicoil.unicoil-cached+rocchio.topics.dl20.unicoil.0shot.txt ``` ## Effectiveness @@ -141,7 +141,7 @@ The experimental results reported here are directly comparable to the results re ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/dl20-passage.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@manveertamber](https://github.com/manveertamber) on 2022-02-25 (commit [`7472d86`](https://github.com/castorini/anserini/commit/7472d862c7311bc8bbd30655c940d6396e27c223)) + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.md b/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.cached.md similarity index 89% rename from docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.md rename to docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.cached.md index 2ff3298397..29725a24d1 100644 --- a/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.md +++ b/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot-v2.cached.md @@ -19,13 +19,13 @@ For additional instructions on working with the MS MARCO V2 document corpus, ref Note that the NIST relevance judgments provide far more relevant documents per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rm3.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rocchio.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rm3.topics.dl21.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.md b/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.cached.md similarity index 92% rename from docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.md rename to docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.cached.md index 6dd04947a3..be499f65c3 100644 --- a/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.md +++ b/docs/regressions/regressions-dl21-doc-segmented.unicoil-0shot.cached.md @@ -18,13 +18,13 @@ For additional instructions on working with the MS MARCO V2 document corpus, ref Note that the NIST relevance judgments provide far more relevant documents per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -33,7 +33,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -57,7 +57,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot ``` @@ -94,17 +94,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.md b/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.md similarity index 87% rename from docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.md rename to docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.md index 7bae2f07b5..12c9da7379 100644 --- a/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.md +++ b/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.md @@ -19,13 +19,13 @@ For additional instructions on working with the MS MARCO V2 document corpus, ref Note that the NIST relevance judgments provide far more relevant documents per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and h With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rm3.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rocchio.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rm3.topics.dl21.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.md b/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.cached.md similarity index 92% rename from docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.md rename to docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.cached.md index 6d208d6e4a..ce31c82e16 100644 --- a/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-dl21-doc-segmented.unicoil-noexp-0shot.cached.md @@ -18,13 +18,13 @@ For additional instructions on working with the MS MARCO V2 document corpus, ref Note that the NIST relevance judgments provide far more relevant documents per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -33,7 +33,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -57,7 +57,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar` is 54 GB and has With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot ``` @@ -94,17 +94,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-passage.splade-pp-ed.md b/docs/regressions/regressions-dl21-passage.splade-pp-ed.cached.md similarity index 88% rename from docs/regressions/regressions-dl21-passage.splade-pp-ed.md rename to docs/regressions/regressions-dl21-passage.splade-pp-ed.cached.md index ade97dc857..b6e8122bd7 100644 --- a/docs/regressions/regressions-dl21-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-dl21-passage.splade-pp-ed.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_ed.tar` is 66 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_ed ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl21.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl21.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl21.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl21.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl21.splade-pp-ed.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-passage.splade-pp-sd.md b/docs/regressions/regressions-dl21-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-dl21-passage.splade-pp-sd.md rename to docs/regressions/regressions-dl21-passage.splade-pp-sd.cached.md index fea9182f8b..e8caea1b86 100644 --- a/docs/regressions/regressions-dl21-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-dl21-passage.splade-pp-sd.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_sd.tar` is 76 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_sd ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl21.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl21.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl21.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl21.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl21.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl21.splade-pp-sd.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-passage.unicoil-0shot.md b/docs/regressions/regressions-dl21-passage.unicoil-0shot.cached.md similarity index 88% rename from docs/regressions/regressions-dl21-passage.unicoil-0shot.md rename to docs/regressions/regressions-dl21-passage.unicoil-0shot.cached.md index 7f695d3f90..1a60846a88 100644 --- a/docs/regressions/regressions-dl21-passage.unicoil-0shot.md +++ b/docs/regressions/regressions-dl21-passage.unicoil-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl21.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl21.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl21.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl21.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl21.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.md b/docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.cached.md similarity index 85% rename from docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.md rename to docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.cached.md index 2a4f9eb0fd..d884f37c59 100644 --- a/docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-dl21-passage.unicoil-noexp-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl21-passage.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl21.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl21.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl21.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl21.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl21.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.md b/docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.cached.md similarity index 89% rename from docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.md rename to docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.cached.md index 5cfe75eca1..32a5a5a019 100644 --- a/docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.md +++ b/docs/regressions/regressions-dl22-doc-segmented.unicoil-0shot-v2.cached.md @@ -19,13 +19,13 @@ Note that the NIST relevance judgments provide far more relevant documents per t An important caveat is that these document judgments were inferred from the passages. That is, if a passage is relevant, the document containing it is considered relevant. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.md b/docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.md similarity index 87% rename from docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.md rename to docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.md index 0a84420136..71e73204af 100644 --- a/docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.md +++ b/docs/regressions/regressions-dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.md @@ -19,13 +19,13 @@ Note that the NIST relevance judgments provide far more relevant documents per t An important caveat is that these document judgments were inferred from the passages. That is, if a passage is relevant, the document containing it is considered relevant. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and h With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-passage.splade-pp-ed.md b/docs/regressions/regressions-dl22-passage.splade-pp-ed.cached.md similarity index 88% rename from docs/regressions/regressions-dl22-passage.splade-pp-ed.md rename to docs/regressions/regressions-dl22-passage.splade-pp-ed.cached.md index 9c6556e48f..66ab715451 100644 --- a/docs/regressions/regressions-dl22-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-dl22-passage.splade-pp-ed.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_ed.tar` is 66 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_ed ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl22.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl22.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl22.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl22.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl22.splade-pp-ed.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-passage.splade-pp-sd.md b/docs/regressions/regressions-dl22-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-dl22-passage.splade-pp-sd.md rename to docs/regressions/regressions-dl22-passage.splade-pp-sd.cached.md index 5b1df99e07..4074ca415e 100644 --- a/docs/regressions/regressions-dl22-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-dl22-passage.splade-pp-sd.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_sd.tar` is 76 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_sd ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl22.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl22.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl22.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl22.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl22.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl22.splade-pp-sd.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-passage.unicoil-0shot.md b/docs/regressions/regressions-dl22-passage.unicoil-0shot.cached.md similarity index 89% rename from docs/regressions/regressions-dl22-passage.unicoil-0shot.md rename to docs/regressions/regressions-dl22-passage.unicoil-0shot.cached.md index 5227b61380..7e949bd9c6 100644 --- a/docs/regressions/regressions-dl22-passage.unicoil-0shot.md +++ b/docs/regressions/regressions-dl22-passage.unicoil-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl22.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl22.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl22.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl22.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl22.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.md b/docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.cached.md similarity index 87% rename from docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.md rename to docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.cached.md index 256488f78b..d5ca41c6a6 100644 --- a/docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-dl22-passage.unicoil-noexp-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl22-passage.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl22-passage.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl22.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl22.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl22.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl22.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl22.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl22-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl22.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.md b/docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.cached.md similarity index 89% rename from docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.md rename to docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.cached.md index b55ea8f974..87d969bfe2 100644 --- a/docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.md +++ b/docs/regressions/regressions-dl23-doc-segmented.unicoil-0shot-v2.cached.md @@ -19,13 +19,13 @@ Note that the NIST relevance judgments provide far more relevant documents per t An important caveat is that these document judgments were inferred from the passages. That is, if a passage is relevant, the document containing it is considered relevant. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.md b/docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.md similarity index 87% rename from docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.md rename to docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.md index 2802f8c720..8d9843231d 100644 --- a/docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.md +++ b/docs/regressions/regressions-dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.md @@ -19,13 +19,13 @@ Note that the NIST relevance judgments provide far more relevant documents per t An important caveat is that these document judgments were inferred from the passages. That is, if a passage is relevant, the document containing it is considered relevant. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -34,7 +34,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -58,7 +58,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and h With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 ``` @@ -95,41 +95,41 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt \ -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-passage.splade-pp-ed.md b/docs/regressions/regressions-dl23-passage.splade-pp-ed.cached.md similarity index 88% rename from docs/regressions/regressions-dl23-passage.splade-pp-ed.md rename to docs/regressions/regressions-dl23-passage.splade-pp-ed.cached.md index 7cc8c8e166..94e3d11f2c 100644 --- a/docs/regressions/regressions-dl23-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-dl23-passage.splade-pp-ed.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_ed.tar` is 66 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_ed ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.dl23.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rm3.topics.dl23.splade-pp-ed.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q+rocchio.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.dl23.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rm3.topics.dl23.splade-pp-ed.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached+rocchio.topics.dl23.splade-pp-ed.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-passage.splade-pp-sd.md b/docs/regressions/regressions-dl23-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-dl23-passage.splade-pp-sd.md rename to docs/regressions/regressions-dl23-passage.splade-pp-sd.cached.md index f1fd5f6602..7c9980faa2 100644 --- a/docs/regressions/regressions-dl23-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-dl23-passage.splade-pp-sd.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -27,7 +27,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -45,7 +45,7 @@ To confirm, `msmarco_v2_passage_splade_pp_sd.tar` is 76 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_sd ``` @@ -82,44 +82,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.dl23.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.dl23.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rm3.topics.dl23.splade-pp-sd.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q+rocchio.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.dl23.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rm3.topics.dl23.splade-pp-sd.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached+rocchio.topics.dl23.splade-pp-sd.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-passage.unicoil-0shot.md b/docs/regressions/regressions-dl23-passage.unicoil-0shot.cached.md similarity index 88% rename from docs/regressions/regressions-dl23-passage.unicoil-0shot.md rename to docs/regressions/regressions-dl23-passage.unicoil-0shot.cached.md index d66b0c68aa..a4f83c4ef9 100644 --- a/docs/regressions/regressions-dl23-passage.unicoil-0shot.md +++ b/docs/regressions/regressions-dl23-passage.unicoil-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q.topics.dl23.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rm3.topics.dl23.unicoil.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached_q+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached.topics.dl23.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rm3.topics.dl23.unicoil.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot-cached+rocchio.topics.dl23.unicoil.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.md b/docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.cached.md similarity index 85% rename from docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.md rename to docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.cached.md index 6a17630768..c72eff5b77 100644 --- a/docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-dl23-passage.unicoil-noexp-0shot.cached.md @@ -13,13 +13,13 @@ For additional instructions on working with the MS MARCO V2 passage corpus, refe Note that the NIST relevance judgments provide far more relevant passages per topic, unlike the "sparse" judgments provided by Microsoft (these are sometimes called "dense" judgments to emphasize this contrast). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -28,7 +28,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression dl23-passage.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -52,7 +52,7 @@ To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression dl23-passage.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot ``` @@ -89,44 +89,44 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt \ -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt \ -impact -pretokenized -rm3 -collection JsonVectorCollection & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.unicoil-noexp-0shot/ \ -topics tools/topics-and-qrels/topics.dl23.unicoil-noexp.0shot.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt \ + -output runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt \ -impact -pretokenized -rocchio -collection JsonVectorCollection & ``` Evaluation can be performed using `trec_eval`: ``` -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q.topics.dl23.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rm3.topics.dl23.unicoil-noexp.0shot.txt - -bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt -bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached_q+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached.topics.dl23.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rm3.topics.dl23.unicoil-noexp.0shot.txt + +bin/trec_eval -c -M 100 -m map -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -M 100 -m recip_rank -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.100 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt +bin/trec_eval -c -m recall.1000 -l 2 tools/topics-and-qrels/qrels.dl23-passage.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot-cached+rocchio.topics.dl23.unicoil-noexp.0shot.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.md b/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.md rename to docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.cached.md index adc8cc39e1..32ffc77db7 100644 --- a/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.md +++ b/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil-noexp.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (without any expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil-noexp ``` @@ -121,7 +121,7 @@ Because of tie-breaking effects, we get slightly different results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2021-06-28 (commit [`1550683`](https://github.com/castorini/anserini/commit/1550683e41cefe89b7e67c0a5f0e147bc70dfcda)) + Results reproduced by [@JMMackenzie](https://github.com/JMMackenzie) on 2021-07-02 (commit [`e4c5127`](https://github.com/castorini/anserini/commit/e4c51278d375ebad9aa2bf9bde66cab32260d6b4)) diff --git a/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.md b/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.md rename to docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.cached.md index 1168c9c0d5..dbb6b1009d 100644 --- a/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.md +++ b/docs/regressions/regressions-msmarco-v1-doc-segmented.unicoil.cached.md @@ -11,13 +11,13 @@ The experiments on this page are not actually reported in the paper. However, the model is the same, applied to the MS MARCO _segmented_ document corpus (with doc2query-T5 expansions). Retrieval uses MaxP technique, where we select the score of the highest-scoring passage from a document as the score for that document to produce a document ranking. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-doc-segmented.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-doc-segmented.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -44,7 +44,7 @@ To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil.cached \ --corpus-path collections/msmarco-doc-segmented-unicoil ``` @@ -152,7 +152,7 @@ Because of tie-breaking effects, we get slightly different results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2021-06-28 (commit [`1550683`](https://github.com/castorini/anserini/commit/1550683e41cefe89b7e67c0a5f0e147bc70dfcda)) + Results reproduced by [@JMMackenzie](https://github.com/JMMackenzie) on 2021-07-02 (commit [`e4c5127`](https://github.com/castorini/anserini/commit/e4c51278d375ebad9aa2bf9bde66cab32260d6b4)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.md rename to docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md index f7c2368f71..33347257f2 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -106,8 +106,8 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.977 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.md b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md similarity index 87% rename from docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.md rename to docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md index 7c45f60802..71a92d9ca4 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.md +++ b/docs/regressions/regressions-msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-bge-base-en-v1.5.tar` is 59 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached \ --corpus-path collections/msmarco-passage-bge-base-en-v1.5 ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.bge-base-en-v1.5/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ + -output runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached_q.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-bge-base-en-v1.5.bge-hnsw-cached.topics.msmarco-passage.dev-subset.bge-base-en-v1.5.jsonl.txt ``` ## Effectiveness @@ -104,8 +104,8 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.977 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md similarity index 87% rename from docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.md rename to docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md index ac6155be46..84a22d83dc 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -104,8 +104,8 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.974 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.md b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md similarity index 86% rename from docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.md rename to docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md index 9b9b82d717..05696275ae 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.md @@ -6,13 +6,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with Cohere embed-english-v3.0. @@ -20,7 +20,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -38,7 +38,7 @@ To confirm, `msmarco-passage-cohere-embed-english-v3.0.tar` is 38 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached \ --corpus-path collections/msmarco-passage-cohere-embed-english-v3.0 ``` @@ -74,17 +74,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cohere-embed-english-v3.0/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ + -output runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached_q.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m ndcg_cut.10 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cohere-embed-english-v3.0.cohere-embed-english-v3.0-cached.topics.msmarco-passage.dev-subset.cohere-embed-english-v3.0.jsonl.txt ``` ## Effectiveness @@ -102,8 +102,8 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.974 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.md rename to docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md index 0f5d3cf693..e4ad1fbd6b 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -106,10 +106,10 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.974 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19)) \ No newline at end of file diff --git a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.md b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md similarity index 87% rename from docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.md rename to docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md index 5a075f55f2..50e74c205d 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.md +++ b/docs/regressions/regressions-msmarco-v1-passage.cos-dpr-distil.hnsw.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with cosDPR-distil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-cos-dpr-distil.tar` is 57 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.cached \ --corpus-path collections/msmarco-passage-cos-dpr-distil ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ + -output runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached_q.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-cos-dpr-distil.cos-dpr-distil-hnsw-cached.topics.msmarco-passage.dev-subset.cos-dpr-distil.jsonl.txt ``` ## Effectiveness @@ -104,10 +104,10 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.974 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@yilinjz](https://github.com/yilinjz) on 2023-09-01 (commit [`4ae518b`](https://github.com/castorini/anserini/commit/4ae518bb284ebcba0b273a473bc8774735cb7d19)) \ No newline at end of file diff --git a/docs/regressions/regressions-msmarco-v1-passage.deepimpact.md b/docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md similarity index 90% rename from docs/regressions/regressions-msmarco-v1-passage.deepimpact.md rename to docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md index 196a499b9e..78f3efbc44 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.deepimpact.md +++ b/docs/regressions/regressions-msmarco-v1-passage.deepimpact.cached.md @@ -7,13 +7,13 @@ The DeepImpact model is described in the following paper: > Antonio Mallia, Omar Khattab, Nicola Tonellotto, and Torsten Suel. [Learning Passage Impacts for Inverted Indexes.](https://dl.acm.org/doi/10.1145/3404835.3463030) _SIGIR 2021_. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.deepimpact.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.deepimpact.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with DeepImpact, i.e., we have applied neural inference and stored the output sparse vectors. @@ -21,7 +21,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.deepimpact +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.deepimpact.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -39,7 +39,7 @@ To confirm, `msmarco-passage-deepimpact.tar` is 3.6 GB and has MD5 checksum `738 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact.cached \ --corpus-path collections/msmarco-passage-deepimpact ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.deepimpact/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.deepimpact.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-deepimpact.deepimpact-cached_q.topics.msmarco-passage.dev-subset.deepimpact.txt \ + -output runs/run.msmarco-passage-deepimpact.deepimpact-cached.topics.msmarco-passage.dev-subset.deepimpact.txt \ -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached_q.topics.msmarco-passage.dev-subset.deepimpact.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached_q.topics.msmarco-passage.dev-subset.deepimpact.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached_q.topics.msmarco-passage.dev-subset.deepimpact.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached_q.topics.msmarco-passage.dev-subset.deepimpact.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached.topics.msmarco-passage.dev-subset.deepimpact.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached.topics.msmarco-passage.dev-subset.deepimpact.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached.topics.msmarco-passage.dev-subset.deepimpact.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-deepimpact.deepimpact-cached.topics.msmarco-passage.dev-subset.deepimpact.txt ``` ## Effectiveness @@ -129,7 +129,7 @@ The final evaluation metric is very close to the one reported in the paper (0.32 ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@MXueguang](https://github.com/MXueguang) on 2021-06-17 (commit [`ff618db`](https://github.com/castorini/anserini/commit/ff618dbf87feee0ad75dc42c72a361c05984097d)) + Results reproduced by [@JMMackenzie](https://github.com/jmmackenzie) on 2021-06-22 (commit [`4904341`](https://github.com/castorini/anserini/commit/490434172a035b6eade8c17771aed83cc7f5d996)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.md b/docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md similarity index 90% rename from docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.md rename to docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md index 962fccb0fe..b9b4250833 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.md +++ b/docs/regressions/regressions-msmarco-v1-passage.distill-splade-max.cached.md @@ -7,13 +7,13 @@ The DistilSPLADE-max model is described in the following paper: > Thibault Formal, Carlos Lassance, Benjamin Piwowarski, Stéphane Clinchant. [SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval.](https://arxiv.org/abs/2109.10086) _arXiv:2109.10086_. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.distill-splade-max.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.distill-splade-max.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with DistilSPLADE-max, i.e., performed model inference on every document and stored the output sparse vectors. @@ -22,7 +22,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.distill-splade-max +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.distill-splade-max.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-distill-splade-max.tar` is 9.9 GB and has MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max.cached \ --corpus-path collections/msmarco-passage-distill-splade-max ``` @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.distill-splade-max/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.distill-splade-max.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached_q.topics.msmarco-passage.dev-subset.distill-splade-max.txt \ + -output runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached.topics.msmarco-passage.dev-subset.distill-splade-max.txt \ -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached_q.topics.msmarco-passage.dev-subset.distill-splade-max.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached_q.topics.msmarco-passage.dev-subset.distill-splade-max.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached_q.topics.msmarco-passage.dev-subset.distill-splade-max.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached_q.topics.msmarco-passage.dev-subset.distill-splade-max.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached.topics.msmarco-passage.dev-subset.distill-splade-max.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached.topics.msmarco-passage.dev-subset.distill-splade-max.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached.topics.msmarco-passage.dev-subset.distill-splade-max.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-distill-splade-max.distill-splade-max-cached.topics.msmarco-passage.dev-subset.distill-splade-max.txt ``` ## Effectiveness @@ -130,7 +130,7 @@ This corresponds to the effectiveness reported in the paper. ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@jmmackenzie](https://github.com/jmmackenzie) on 2021-10-15 (commit [`52b76f6`](https://github.com/castorini/anserini/commit/52b76f63b163036e8fad1a6e1b10b431b4ddd06c)) + Results reproduced by [@justram](https://github.com/justram) on 2022-03-02 (commit [`41b64d9`](https://github.com/castorini/anserini/commit/41b65d9fcb82d787faf4ca937f81faca82ead8c2)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.md rename to docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md index d3c1c7bcd6..decb5d1010 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw-int8.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -78,17 +78,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw-int8.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -106,9 +106,9 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.983 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.md b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md similarity index 86% rename from docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.md rename to docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md index dbab7830d6..5ae70e245e 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.md +++ b/docs/regressions/regressions-msmarco-v1-passage.openai-ada2.hnsw.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with the OpenAI-ada2 embedding model. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-openai-ada2.tar` is 109 GB and has MD5 checksum `a4 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw.cached \ --corpus-path collections/msmarco-passage-openai-ada2 ``` @@ -76,17 +76,17 @@ bin/run.sh io.anserini.search.SearchHnswDenseVectors \ -index indexes/lucene-hnsw.msmarco-v1-passage.openai-ada2/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.openai-ada2.jsonl.gz \ -topicReader JsonIntVector \ - -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ + -output runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt \ -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached_q.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-openai-ada2.openai-ada2-cached.topics.msmarco-passage.dev-subset.openai-ada2.jsonl.txt ``` ## Effectiveness @@ -104,9 +104,9 @@ With the above commands, you should be able to reproduce the following results: | [MS MARCO Passage: Dev](https://github.com/microsoft/MSMARCO-Passage-Ranking) | 0.985 | Note that due to the non-deterministic nature of HNSW indexing, results may differ slightly between each experimental run. -Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.yaml). +Nevertheless, scores are generally within 0.005 of the reference values recorded in [our YAML configuration file](../../src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml). ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template) and run `bin/build.sh` to rebuild the documentation. diff --git a/docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.md b/docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md similarity index 89% rename from docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.md rename to docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md index 1dd4aab9b8..1a01e405ad 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-msmarco-v1-passage.splade-pp-ed.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-splade-pp-ed.tar` is 4.2 GB and has MD5 checksum `e With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco-passage-splade-pp-ed ``` @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-passage.dev-subset.splade-pp-ed.txt \ + -output runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-passage.dev-subset.splade-pp-ed.txt \ -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-passage.dev-subset.splade-pp-ed.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-passage.dev-subset.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-passage.dev-subset.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-passage.dev-subset.splade-pp-ed.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-passage.dev-subset.splade-pp-ed.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-passage.dev-subset.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-passage.dev-subset.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-passage.dev-subset.splade-pp-ed.txt ``` ## Effectiveness @@ -106,7 +106,7 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) \ No newline at end of file diff --git a/docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.md b/docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.md rename to docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md index d79e50e698..7e58b01676 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-msmarco-v1-passage.splade-pp-sd.cached.md @@ -8,13 +8,13 @@ This page describes regression experiments, integrated into Anserini's regressio In these experiments, we are using pre-encoded queries (i.e., cached results of query encoding). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -22,7 +22,7 @@ We make available a version of the MS MARCO Passage Corpus that has already been From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-splade-pp-sd.tar` is 4.8 GB and has MD5 checksum `c With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco-passage-splade-pp-sd ``` @@ -77,17 +77,17 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v1-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-passage.dev-subset.splade-pp-sd.txt \ + -output runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-passage.dev-subset.splade-pp-sd.txt \ -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-passage.dev-subset.splade-pp-sd.txt -bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-passage.dev-subset.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-passage.dev-subset.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-passage.dev-subset.splade-pp-sd.txt +bin/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-passage.dev-subset.splade-pp-sd.txt +bin/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-passage.dev-subset.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-passage.dev-subset.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-passage.dev-subset.splade-pp-sd.txt ``` ## Effectiveness @@ -106,7 +106,7 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@justram](https://github.com/justram) on 2023-03-08 (commit [`03f95a8`](https://github.com/castorini/anserini/commit/03f95a8e1ae09ab09efe046bfcbd3a4cdda691b4)) + Results reproduced by [@ArthurChen189](https://github.com/ArthurChen189) on 2023-06-01 (commit [`a403a2a`](https://github.com/castorini/anserini/commit/a403a2a44af9322c7a2dbdb5240180a62398ab06)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.md b/docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md similarity index 94% rename from docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.md rename to docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md index 9b5b71ca5a..b3acf1baec 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.md +++ b/docs/regressions/regressions-msmarco-v1-passage.unicoil-noexp.cached.md @@ -10,13 +10,13 @@ The uniCOIL model is described in the following paper: The experiments on this page are not actually reported in the paper. Here, a variant model without expansion is used. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -25,7 +25,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil-noexp +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil-noexp.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum ` With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp.cached \ --corpus-path collections/msmarco-passage-unicoil-noexp ``` @@ -109,7 +109,7 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2021-06-28 (commit [`1550683`](https://github.com/castorini/anserini/commit/1550683e41cefe89b7e67c0a5f0e147bc70dfcda)) + Results reproduced by [@JMMackenzie](https://github.com/JMMackenzie) on 2021-07-02 (commit [`e4c5127`](https://github.com/castorini/anserini/commit/e4c51278d375ebad9aa2bf9bde66cab32260d6b4)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.md b/docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.md rename to docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md index 5ba915ae09..30f82fd619 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.md +++ b/docs/regressions/regressions-msmarco-v1-passage.unicoil-tilde-expansion.cached.md @@ -7,13 +7,13 @@ The uniCOIL+TILDE model is described in the following paper: > Shengyao Zhuang and Guido Zuccon. [Fast Passage Re-ranking with Contextualized Exact Term Matching and Efficient Passage Expansion.](https://arxiv.org/pdf/2108.08513) _arXiv:2108.08513_. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL + TILDE expansions, i.e., performed model inference on every document and stored the output sparse vectors. @@ -22,7 +22,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-unicoil-tilde-expansion.tar` is 3.9 GB and has MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion.cached \ --corpus-path collections/msmarco-passage-unicoil-tilde-expansion ``` @@ -130,7 +130,7 @@ This corresponds to the effectiveness reported in the paper. ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@MXueguang](https://github.com/MXueguang) on 2021-09-14 (commit [`a05fc52`](https://github.com/castorini/anserini/commit/a05fc5215a6d9de77bd5f4b8f874f608442024a3)) + Results reproduced by [@jmmackenzie](https://github.com/jmmackenzie) on 2021-10-15 (commit [`52b76f6`](https://github.com/castorini/anserini/commit/52b76f63b163036e8fad1a6e1b10b431b4ddd06c)) diff --git a/docs/regressions/regressions-msmarco-v1-passage.unicoil.md b/docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md similarity index 94% rename from docs/regressions/regressions-msmarco-v1-passage.unicoil.md rename to docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md index c012a7f17b..1463240f39 100644 --- a/docs/regressions/regressions-msmarco-v1-passage.unicoil.md +++ b/docs/regressions/regressions-msmarco-v1-passage.unicoil.cached.md @@ -7,13 +7,13 @@ The uniCOIL model is described in the following paper: > Jimmy Lin and Xueguang Ma. [A Few Brief Notes on DeepImpact, COIL, and a Conceptual Framework for Information Retrieval Techniques.](https://arxiv.org/abs/2106.14807) _arXiv:2106.14807_. -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v1-passage.unicoil.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil.cached ``` We make available a version of the MS MARCO Passage Corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -22,7 +22,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v1-passage.unicoil.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -40,7 +40,7 @@ To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef7 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil.cached \ --corpus-path collections/msmarco-passage-unicoil ``` @@ -130,7 +130,7 @@ This corresponds to the effectiveness reported in the paper and also the run nam ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2021-06-28 (commit [`1550683`](https://github.com/castorini/anserini/commit/1550683e41cefe89b7e67c0a5f0e147bc70dfcda)) + Results reproduced by [@JMMackenzie](https://github.com/JMMackenzie) on 2021-07-02 (commit [`e4c5127`](https://github.com/castorini/anserini/commit/e4c51278d375ebad9aa2bf9bde66cab32260d6b4)) diff --git a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.md b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.md rename to docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.md index b7bb61d146..f54766b1a1 100644 --- a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.md +++ b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.md @@ -17,13 +17,13 @@ You probably don't want to use them. For additional instructions on working with the MS MARCO V2 document corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -32,7 +32,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -56,7 +56,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2 ``` @@ -133,6 +133,6 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.md b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.md rename to docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.cached.md index 45d7d09a67..41337e4e36 100644 --- a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.md +++ b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-0shot.cached.md @@ -16,13 +16,13 @@ The version that uses title/segment encoding can be found [here](regressions-msm For additional instructions on working with the MS MARCO V2 document corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -31,7 +31,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -55,7 +55,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot ``` @@ -132,6 +132,6 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.md b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.md rename to docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.md index 591c52e53d..f808dee878 100644 --- a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.md +++ b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.md @@ -17,13 +17,13 @@ You probably don't want to use them. For additional instructions on working with the MS MARCO V2 document corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -32,7 +32,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2 +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -56,7 +56,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and h With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2 \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 ``` @@ -133,6 +133,6 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.md b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.md rename to docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.md index c512fd6d3b..cde9b44c11 100644 --- a/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.md @@ -16,13 +16,13 @@ The version that uses title/segment encoding can be found [here](regressions-msm For additional instructions on working with the MS MARCO V2 document corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached ``` We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -31,7 +31,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -55,7 +55,7 @@ To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar` is 54 GB and has With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot ``` @@ -132,6 +132,6 @@ With the above commands, you should be able to reproduce the following results: ## Reproduction Log[*](../../docs/reproducibility.md) -To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.template) and run `bin/build.sh` to rebuild the documentation. +To add to this reproduction log, modify [this template](../../src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.template) and run `bin/build.sh` to rebuild the documentation. + Results reproduced by [@lintool](https://github.com/lintool) on 2022-06-06 (commit [`236b386`](https://github.com/castorini/anserini/commit/236b386ddc11d292b4b736162b59488a02236d6c)) diff --git a/docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.md b/docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.md rename to docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.cached.md index 34e794c059..04f90051fb 100644 --- a/docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.md +++ b/docs/regressions/regressions-msmarco-v2-passage.splade-pp-ed.cached.md @@ -11,13 +11,13 @@ The model is described in the following paper: For additional instructions on working with the MS MARCO V2 passage corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-EnsembleDistil. @@ -25,7 +25,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.splade-pp-ed +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco_v2_passage_splade_pp_ed.tar` is 66 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_ed ``` @@ -79,25 +79,25 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.msmarco-v2-passage.dev.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-ed/ \ -topics tools/topics-and-qrels/topics.msmarco-v2-passage.dev2.splade-pp-ed.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt \ -parallelism 16 -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev.splade-pp-ed.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt -bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev.splade-pp-ed.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-ed.splade-pp-ed-cached.topics.msmarco-v2-passage.dev2.splade-pp-ed.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.md b/docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.cached.md similarity index 88% rename from docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.md rename to docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.cached.md index 43ff5d3767..76f003ab69 100644 --- a/docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.md +++ b/docs/regressions/regressions-msmarco-v2-passage.splade-pp-sd.cached.md @@ -11,13 +11,13 @@ The model is described in the following paper: For additional instructions on working with the MS MARCO V2 passage corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.cached ``` We make available a version of the corpus that has already been encoded with SPLADE++ CoCondenser-SelfDistil. @@ -25,7 +25,7 @@ We make available a version of the corpus that has already been encoded with SPL From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.splade-pp-sd +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -43,7 +43,7 @@ To confirm, `msmarco_v2_passage_splade_pp_sd.tar` is 76 GB and has MD5 checksum With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.cached \ --corpus-path collections/msmarco_v2_passage_splade_pp_sd ``` @@ -79,25 +79,25 @@ bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.msmarco-v2-passage.dev.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized & bin/run.sh io.anserini.search.SearchCollection \ -index indexes/lucene-inverted.msmarco-v2-passage.splade-pp-sd/ \ -topics tools/topics-and-qrels/topics.msmarco-v2-passage.dev2.splade-pp-sd.tsv.gz \ -topicReader TsvInt \ - -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt \ + -output runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt \ -parallelism 16 -impact -pretokenized & ``` Evaluation can be performed using `trec_eval`: ```bash -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev.splade-pp-sd.txt -bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt -bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt -bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached_q.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev.splade-pp-sd.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2-passage.dev2.txt runs/run.msmarco-v2-passage-splade-pp-sd.splade-pp-sd-cached.topics.msmarco-v2-passage.dev2.splade-pp-sd.txt ``` ## Effectiveness diff --git a/docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.md b/docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.cached.md similarity index 95% rename from docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.md rename to docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.cached.md index 85b3c0097a..c116fc7286 100644 --- a/docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.md +++ b/docs/regressions/regressions-msmarco-v2-passage.unicoil-0shot.cached.md @@ -11,13 +11,13 @@ The uniCOIL model is described in the following paper: For additional instructions on working with the MS MARCO V2 passage corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.unicoil-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.unicoil-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -50,7 +50,7 @@ To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 check With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-0shot ``` diff --git a/docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.md b/docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.cached.md similarity index 96% rename from docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.md rename to docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.cached.md index d2a53c7c92..24a3791320 100644 --- a/docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.md +++ b/docs/regressions/regressions-msmarco-v2-passage.unicoil-noexp-0shot.cached.md @@ -11,13 +11,13 @@ The uniCOIL model is described in the following paper: For additional instructions on working with the MS MARCO V2 passage corpus, refer to [this page](../../docs/experiments-msmarco-v2.md). -The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot.cached ``` We make available a version of the corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors. @@ -26,7 +26,7 @@ Thus, no neural inference is involved. From any machine, the following command will download the corpus and perform the complete regression, end to end: ```bash -python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot +python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot.cached ``` The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results. @@ -50,7 +50,7 @@ To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 With the corpus downloaded, the following command will perform the remaining steps below: ```bash -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot \ +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot.cached \ --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot ``` diff --git a/src/main/python/regressions-batch03.txt b/src/main/python/regressions-batch03.txt index 6211e70e5c..ca9a82eb6a 100644 --- a/src/main/python/regressions-batch03.txt +++ b/src/main/python/regressions-batch03.txt @@ -2,17 +2,17 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2.1-doc-segmented > logs/log.msmarco-v2.1-doc-segmented.txt 2>&1 # MS MARCO V1 passage -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw > logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8 > logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8 > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw > logs/log.msmarco-v1-passage.openai-ada2.hnsw.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8 > logs/log.msmarco-v1-passage.openai-ada2.hnsw-int8.txt 2>&1 - -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed > logs/log.msmarco-v1-passage.splade-pp-ed.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd > logs/log.msmarco-v1-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.cached > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw.cached > logs/log.msmarco-v1-passage.openai-ada2.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.openai-ada2.hnsw-int8.cached > logs/log.msmarco-v1-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 + +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-ed.cached > logs/log.msmarco-v1-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.splade-pp-sd.cached > logs/log.msmarco-v1-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.fw > logs/log.msmarco-v1-passage.cos-dpr-distil.fw.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.cos-dpr-distil.lexlsh > logs/log.msmarco-v1-passage.cos-dpr-distil.lexlsh.txt 2>&1 @@ -23,11 +23,11 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.wp-ca > logs/log.msmarco-v1-passage.wp-ca.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.doc2query > logs/log.msmarco-v1-passage.doc2query.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.docTTTTTquery > logs/log.msmarco-v1-passage.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact > logs/log.msmarco-v1-passage.deepimpact.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil > logs/log.msmarco-v1-passage.unicoil.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp > logs/log.msmarco-v1-passage.unicoil-noexp.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion > logs/log.msmarco-v1-passage.unicoil-tilde-expansion.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max > logs/log.msmarco-v1-passage.distill-splade-max.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.deepimpact.cached > logs/log.msmarco-v1-passage.deepimpact.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil.cached > logs/log.msmarco-v1-passage.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-noexp.cached > logs/log.msmarco-v1-passage.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.unicoil-tilde-expansion.cached > logs/log.msmarco-v1-passage.unicoil-tilde-expansion.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-passage.distill-splade-max.cached > logs/log.msmarco-v1-passage.distill-splade-max.cached.txt 2>&1 # MS MARCO V2.1 python src/main/python/run_regression.py --verify --search --regression dl21-doc-msmarco-v2.1 > logs/log.dl21-doc-msmarco-v2.1.txt 2>&1 @@ -50,8 +50,8 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.wp-tok > logs/log.msmarco-v1-doc-segmented.wp-tok.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.wp-ca > logs/log.msmarco-v1-doc-segmented.wp-ca.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.docTTTTTquery > logs/log.msmarco-v1-doc-segmented.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil > logs/log.msmarco-v1-doc-segmented.unicoil.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp > logs/log.msmarco-v1-doc-segmented.unicoil-noexp.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil.cached > logs/log.msmarco-v1-doc-segmented.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v1-doc-segmented.unicoil-noexp.cached > logs/log.msmarco-v1-doc-segmented.unicoil-noexp.cached.txt 2>&1 # MS MARCO V2 passage python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage > logs/log.msmarco-v2-passage.txt 2>&1 @@ -60,11 +60,11 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-augmented > logs/log.msmarco-v2-passage-augmented.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-augmented.d2q-t5 > logs/log.msmarco-v2-passage-augmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot > logs/log.msmarco-v2-passage.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot > logs/log.msmarco-v2-passage.unicoil-0shot.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-noexp-0shot.cached > logs/log.msmarco-v2-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.unicoil-0shot.cached > logs/log.msmarco-v2-passage.unicoil-0shot.cached.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed > logs/log.msmarco-v2-passage.splade-pp-ed.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd > logs/log.msmarco-v2-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-ed.cached > logs/log.msmarco-v2-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage.splade-pp-sd.cached > logs/log.msmarco-v2-passage.splade-pp-sd.cached.txt 2>&1 # MS MARCO V2 doc python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc > logs/log.msmarco-v2-doc.txt 2>&1 @@ -73,10 +73,10 @@ python src/main/python/run_regression.py --index --verify --search --regression python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented > logs/log.msmarco-v2-doc-segmented.txt 2>&1 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.d2q-t5 > logs/log.msmarco-v2-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot > logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2 > logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot > logs/log.msmarco-v2-doc-segmented.unicoil-0shot.txt 2>&1 -python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2 > logs/log.msmarco-v2-doc-segmented.unicoil-0shot-v2.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached > logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot.cached > logs/log.msmarco-v2-doc-segmented.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented.unicoil-0shot-v2.cached > logs/log.msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # MS MARCO V1 passage search-only python src/main/python/run_regression.py --search --regression msmarco-v1-passage.cos-dpr-distil.hnsw.onnx > logs/log.msmarco-v1-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 @@ -100,21 +100,21 @@ python src/main/python/run_regression.py --search --regression dl19-passage.docT python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-ed.onnx > logs/log.dl19-passage.splade-pp-ed.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-sd.onnx > logs/log.dl19-passage.splade-pp-sd.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8 > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw > logs/log.dl19-passage.cos-dpr-distil.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw-int8 > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.fw > logs/log.dl19-passage.cos-dpr-distil.fw.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.lexlsh > logs/log.dl19-passage.cos-dpr-distil.lexlsh.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw > logs/log.dl19-passage.openai-ada2.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw-int8 > logs/log.dl19-passage.openai-ada2.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8 > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw.cached > logs/log.dl19-passage.openai-ada2.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.openai-ada2.hnsw-int8.cached > logs/log.dl19-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.unicoil > logs/log.dl19-passage.unicoil.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.unicoil-noexp > logs/log.dl19-passage.unicoil-noexp.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-ed > logs/log.dl19-passage.splade-pp-ed.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-sd > logs/log.dl19-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.unicoil.cached > logs/log.dl19-passage.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.unicoil-noexp.cached > logs/log.dl19-passage.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-ed.cached > logs/log.dl19-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-passage.splade-pp-sd.cached > logs/log.dl19-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl19-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 @@ -132,8 +132,8 @@ python src/main/python/run_regression.py --search --regression dl19-doc-segmente python src/main/python/run_regression.py --search --regression dl19-doc-segmented.wp-ca > logs/log.dl19-doc-segmented.wp-ca.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-doc-segmented.wp-tok > logs/log.dl19-doc-segmented.wp-tok.txt 2>&1 python src/main/python/run_regression.py --search --regression dl19-doc-segmented.docTTTTTquery > logs/log.dl19-doc-segmented.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil > logs/log.dl19-doc-segmented.unicoil.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil-noexp > logs/log.dl19-doc-segmented.unicoil-noexp.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil.cached > logs/log.dl19-doc-segmented.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl19-doc-segmented.unicoil-noexp.cached > logs/log.dl19-doc-segmented.unicoil-noexp.cached.txt 2>&1 # DL20 python src/main/python/run_regression.py --search --regression dl20-passage > logs/log.dl20-passage.txt 2>&1 @@ -145,21 +145,21 @@ python src/main/python/run_regression.py --search --regression dl20-passage.docT python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-ed.onnx > logs/log.dl20-passage.splade-pp-ed.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-sd.onnx > logs/log.dl20-passage.splade-pp-sd.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8 > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw > logs/log.dl20-passage.cos-dpr-distil.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw-int8 > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.bge-base-en-v1.5.hnsw-int8.cached > logs/log.dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.cached > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.fw > logs/log.dl20-passage.cos-dpr-distil.fw.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.lexlsh > logs/log.dl20-passage.cos-dpr-distil.lexlsh.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw > logs/log.dl20-passage.openai-ada2.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw-int8 > logs/log.dl20-passage.openai-ada2.hnsw-int8.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8 > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw.cached > logs/log.dl20-passage.openai-ada2.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.openai-ada2.hnsw-int8.cached > logs/log.dl20-passage.openai-ada2.hnsw-int8.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached > logs/log.dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.unicoil > logs/log.dl20-passage.unicoil.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.unicoil-noexp > logs/log.dl20-passage.unicoil-noexp.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-ed > logs/log.dl20-passage.splade-pp-ed.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-sd > logs/log.dl20-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.unicoil.cached > logs/log.dl20-passage.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.unicoil-noexp.cached > logs/log.dl20-passage.unicoil-noexp.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-ed.cached > logs/log.dl20-passage.splade-pp-ed.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-passage.splade-pp-sd.cached > logs/log.dl20-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-passage.cos-dpr-distil.hnsw-int8.onnx > logs/log.dl20-passage.cos-dpr-distil.hnsw-int8.onnx.txt 2>&1 @@ -177,8 +177,8 @@ python src/main/python/run_regression.py --search --regression dl20-doc-segmente python src/main/python/run_regression.py --search --regression dl20-doc-segmented.wp-ca > logs/log.dl20-doc-segmented.wp-ca.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-doc-segmented.wp-tok > logs/log.dl20-doc-segmented.wp-tok.txt 2>&1 python src/main/python/run_regression.py --search --regression dl20-doc-segmented.docTTTTTquery > logs/log.dl20-doc-segmented.docTTTTTquery.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil > logs/log.dl20-doc-segmented.unicoil.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil-noexp > logs/log.dl20-doc-segmented.unicoil-noexp.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil.cached > logs/log.dl20-doc-segmented.unicoil.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl20-doc-segmented.unicoil-noexp.cached > logs/log.dl20-doc-segmented.unicoil-noexp.cached.txt 2>&1 # DL21 python src/main/python/run_regression.py --search --regression dl21-passage > logs/log.dl21-passage.txt 2>&1 @@ -187,11 +187,11 @@ python src/main/python/run_regression.py --search --regression dl21-passage.d2q- python src/main/python/run_regression.py --search --regression dl21-passage-augmented > logs/log.dl21-passage-augmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl21-passage-augmented.d2q-t5 > logs/log.dl21-passage-augmented-d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-noexp-0shot > logs/log.dl21-passage.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-0shot > logs/log.dl21-passage.unicoil-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-ed > logs/log.dl21-passage.splade-pp-ed.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-noexp-0shot.cached > logs/log.dl21-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-passage.unicoil-0shot.cached > logs/log.dl21-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-ed.cached > logs/log.dl21-passage.splade-pp-ed.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-ed.onnx > logs/log.dl21-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-sd > logs/log.dl21-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-sd.cached > logs/log.dl21-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl21-passage.splade-pp-sd.onnx > logs/log.dl21-passage.splade-pp-sd.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl21-doc > logs/log.dl21-doc.txt 2>&1 @@ -199,10 +199,10 @@ python src/main/python/run_regression.py --search --regression dl21-doc.d2q-t5 > python src/main/python/run_regression.py --search --regression dl21-doc-segmented > logs/log.dl21-doc-segmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl21-doc-segmented.d2q-t5 > logs/log.dl21-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot > logs/log.dl21-doc-segmented.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2 > logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot > logs/log.dl21-doc-segmented.unicoil-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot-v2 > logs/log.dl21-doc-segmented.unicoil-0shot-v2.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot.cached > logs/log.dl21-doc-segmented.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl21-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl21-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # DL22 python src/main/python/run_regression.py --search --regression dl22-passage > logs/log.dl22-passage.txt 2>&1 @@ -211,11 +211,11 @@ python src/main/python/run_regression.py --search --regression dl22-passage.d2q- python src/main/python/run_regression.py --search --regression dl22-passage-augmented > logs/log.dl22-passage-augmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl22-passage-augmented.d2q-t5 > logs/log.dl22-passage-augmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-noexp-0shot > logs/log.dl22-passage.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-0shot > logs/log.dl22-passage.unicoil-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-ed > logs/log.dl22-passage.splade-pp-ed.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-noexp-0shot.cached > logs/log.dl22-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-passage.unicoil-0shot.cached > logs/log.dl22-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-ed.cached > logs/log.dl22-passage.splade-pp-ed.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-ed.onnx > logs/log.dl22-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-sd > logs/log.dl22-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-sd.cached > logs/log.dl22-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl22-passage.splade-pp-sd.onnx > logs/log.dl22-passage.splade-pp-sd.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl22-doc > logs/log.dl22-doc.txt 2>&1 @@ -223,8 +223,8 @@ python src/main/python/run_regression.py --search --regression dl22-doc.d2q-t5 > python src/main/python/run_regression.py --search --regression dl22-doc-segmented > logs/log.dl22-doc-segmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl22-doc-segmented.d2q-t5 > logs/log.dl22-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2 > logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-0shot-v2 > logs/log.dl22-doc-segmented.unicoil-0shot-v2.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl22-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl22-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 # DL23 python src/main/python/run_regression.py --search --regression dl23-passage > logs/log.dl23-passage.txt 2>&1 @@ -233,11 +233,11 @@ python src/main/python/run_regression.py --search --regression dl23-passage.d2q- python src/main/python/run_regression.py --search --regression dl23-passage-augmented > logs/log.dl23-passage-augmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl23-passage-augmented.d2q-t5 > logs/log.dl23-passage-augmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-noexp-0shot > logs/log.dl23-passage.unicoil-noexp-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-0shot > logs/log.dl23-passage.unicoil-0shot.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-ed > logs/log.dl23-passage.splade-pp-ed.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-noexp-0shot.cached > logs/log.dl23-passage.unicoil-noexp-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-passage.unicoil-0shot.cached > logs/log.dl23-passage.unicoil-0shot.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-ed.cached > logs/log.dl23-passage.splade-pp-ed.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-ed.onnx > logs/log.dl23-passage.splade-pp-ed.onnx.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-sd > logs/log.dl23-passage.splade-pp-sd.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-sd.cached > logs/log.dl23-passage.splade-pp-sd.cached.txt 2>&1 python src/main/python/run_regression.py --search --regression dl23-passage.splade-pp-sd.onnx > logs/log.dl23-passage.splade-pp-sd.onnx.txt 2>&1 python src/main/python/run_regression.py --search --regression dl23-doc > logs/log.dl23-doc.txt 2>&1 @@ -245,5 +245,5 @@ python src/main/python/run_regression.py --search --regression dl23-doc.d2q-t5 > python src/main/python/run_regression.py --search --regression dl23-doc-segmented > logs/log.dl23-doc-segmented.txt 2>&1 python src/main/python/run_regression.py --search --regression dl23-doc-segmented.d2q-t5 > logs/log.dl23-doc-segmented.d2q-t5.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2 > logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.txt 2>&1 -python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-0shot-v2 > logs/log.dl23-doc-segmented.unicoil-0shot-v2.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-noexp-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.txt 2>&1 +python src/main/python/run_regression.py --search --regression dl23-doc-segmented.unicoil-0shot-v2.cached > logs/log.dl23-doc-segmented.unicoil-0shot-v2.cached.txt 2>&1 diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py index 07bcf43c26..58507fcf9e 100644 --- a/src/main/python/run_regression.py +++ b/src/main/python/run_regression.py @@ -143,9 +143,11 @@ def construct_indexing_command(yaml_data, args): def construct_runfile_path(index, id, model_name): - # If the index is 'indexes/lucene-index.msmarco-passage-ca/', we pull out 'msmarco-passage-ca'. - # Be careful, for 'indexes/lucene-index.mrtydi-v1.1-arabic/', we want to pull out 'mrtydi-v1.1-arabic'. - index_part = index.split('/')[1].split('.', 1)[1] + # If the index is 'indexes/lucene-inverted.msmarco-passage-ca/', we pull out 'msmarco-passage-ca'. + # 'indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/' -> 'hnsw-int8.msmarco-v1-passage.cos-dpr-distil' + # 'indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/' -> 'hnsw.msmarco-v1-passage.cos-dpr-distil/' + # Be careful, for 'indexes/lucene-inverted.mrtydi-v1.1-arabic/', we want to pull out 'inverted-mrtydi-v1.1-arabic'. + index_part = index.split('/')[1].split('-', 1)[1] return os.path.join('runs/', 'run.{0}.{1}.{2}'.format(index_part, id, model_name)) diff --git a/src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.template b/src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.template rename to src/main/resources/docgen/templates/dl19-doc-segmented.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.template b/src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.template rename to src/main/resources/docgen/templates/dl19-doc-segmented.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.template rename to src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.template b/src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.template rename to src/main/resources/docgen/templates/dl19-passage.bge-base-en-v1.5.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.template rename to src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.template b/src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.template rename to src/main/resources/docgen/templates/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.template rename to src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.template b/src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.template rename to src/main/resources/docgen/templates/dl19-passage.cos-dpr-distil.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.template rename to src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.template b/src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.template rename to src/main/resources/docgen/templates/dl19-passage.openai-ada2.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/dl19-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/dl19-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.template b/src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.template rename to src/main/resources/docgen/templates/dl19-passage.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/dl19-passage.unicoil.template b/src/main/resources/docgen/templates/dl19-passage.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl19-passage.unicoil.template rename to src/main/resources/docgen/templates/dl19-passage.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.template b/src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.template rename to src/main/resources/docgen/templates/dl20-doc-segmented.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.template b/src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.template rename to src/main/resources/docgen/templates/dl20-doc-segmented.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.template rename to src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.template b/src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.template rename to src/main/resources/docgen/templates/dl20-passage.bge-base-en-v1.5.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.template rename to src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.template b/src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.template rename to src/main/resources/docgen/templates/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.template rename to src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.template b/src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.template rename to src/main/resources/docgen/templates/dl20-passage.cos-dpr-distil.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.template rename to src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.template b/src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.template rename to src/main/resources/docgen/templates/dl20-passage.openai-ada2.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/dl20-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/dl20-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.template b/src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.template rename to src/main/resources/docgen/templates/dl20-passage.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/dl20-passage.unicoil.template b/src/main/resources/docgen/templates/dl20-passage.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl20-passage.unicoil.template rename to src/main/resources/docgen/templates/dl20-passage.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.template b/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.template rename to src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.template b/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.template rename to src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.template rename to src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/dl21-doc-segmented.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/dl21-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/dl21-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.template b/src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.template rename to src/main/resources/docgen/templates/dl21-passage.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/dl21-passage.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.template b/src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.template rename to src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.template rename to src/main/resources/docgen/templates/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/dl22-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/dl22-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.template b/src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.template rename to src/main/resources/docgen/templates/dl22-passage.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/dl22-passage.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.template b/src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.template rename to src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.template rename to src/main/resources/docgen/templates/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/dl23-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/dl23-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.template b/src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.template rename to src/main/resources/docgen/templates/dl23-passage.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/dl23-passage.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.template b/src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.template rename to src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.template b/src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.template rename to src/main/resources/docgen/templates/msmarco-v1-doc-segmented.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.template b/src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.template b/src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.template b/src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.deepimpact.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.template b/src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.distill-splade-max.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.template b/src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.openai-ada2.hnsw.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.template b/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-noexp.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.template b/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.unicoil-tilde-expansion.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.template b/src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.template rename to src/main/resources/docgen/templates/msmarco-v1-passage.unicoil.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.template rename to src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.template rename to src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.template rename to src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.template b/src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.template rename to src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-ed.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.template b/src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.template rename to src/main/resources/docgen/templates/msmarco-v2-passage.splade-pp-sd.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.template rename to src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-0shot.cached.template diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.cached.template similarity index 100% rename from src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.template rename to src/main/resources/docgen/templates/msmarco-v2-passage.unicoil-noexp-0shot.cached.template diff --git a/src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.yaml b/src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.cached.yaml similarity index 100% rename from src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.yaml rename to src/main/resources/regression/dl19-doc-segmented.unicoil-noexp.cached.yaml diff --git a/src/main/resources/regression/dl19-doc-segmented.unicoil.yaml b/src/main/resources/regression/dl19-doc-segmented.unicoil.cached.yaml similarity index 100% rename from src/main/resources/regression/dl19-doc-segmented.unicoil.yaml rename to src/main/resources/regression/dl19-doc-segmented.unicoil.cached.yaml diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml rename to src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index 6815a9221b..cff74a2891 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml index eb3c6831a5..c78c86465e 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw-int8.onnx.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw + - name: bge-hnsw-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.yaml rename to src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml index 2f04cd639f..eb9fc27de3 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml index 41a5b1005b..9b87263dbe 100644 --- a/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.bge-base-en-v1.5.hnsw.onnx.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: bge-hnsw + - name: bge-hnsw-onnx display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder BgeBaseEn15 diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml rename to src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index e93ce112d2..76a1e43360 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.yaml b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.yaml rename to src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index 280300c019..2c11af4800 100644 --- a/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.yaml +++ b/src/main/resources/regression/dl19-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.yaml rename to src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml index 47948ae150..4ea10ff86e 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml index fd5c9cbb22..535baa79be 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw-int8.onnx.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw + - name: cos-dpr-distil-hnsw-onnx display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.yaml rename to src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml index ec137ff3ab..91e5704ba4 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml index bfbb347da8..400a13dd9a 100644 --- a/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml +++ b/src/main/resources/regression/dl19-passage.cos-dpr-distil.hnsw.onnx.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: cos-dpr-distil-hnsw + - name: cos-dpr-distil-hnsw-onnx display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField title -threads 16 -hits 1000 -efSearch 1000 -encoder CosDprDistil diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.yaml rename to src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml index 84f442005b..b7bb1a9be5 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.yaml b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/dl19-passage.openai-ada2.hnsw.yaml rename to src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml index eb86bcde5d..6b16e279aa 100644 --- a/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.yaml +++ b/src/main/resources/regression/dl19-passage.openai-ada2.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl19-passage.splade-pp-ed.yaml b/src/main/resources/regression/dl19-passage.splade-pp-ed.cached.yaml similarity index 95% rename from src/main/resources/regression/dl19-passage.splade-pp-ed.yaml rename to src/main/resources/regression/dl19-passage.splade-pp-ed.cached.yaml index 92451bd848..e0c2eeb54e 100644 --- a/src/main/resources/regression/dl19-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/dl19-passage.splade-pp-ed.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.6390 R@1000: - 0.8726 - - name: splade-pp-ed-cached_q+rm3 + - name: splade-pp-ed-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.6425 R@1000: - 0.8684 - - name: splade-pp-ed-cached_q+rocchio + - name: splade-pp-ed-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl19-passage.splade-pp-sd.yaml b/src/main/resources/regression/dl19-passage.splade-pp-sd.cached.yaml similarity index 95% rename from src/main/resources/regression/dl19-passage.splade-pp-sd.yaml rename to src/main/resources/regression/dl19-passage.splade-pp-sd.cached.yaml index eb54068be2..e4742a1b76 100644 --- a/src/main/resources/regression/dl19-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/dl19-passage.splade-pp-sd.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.6353 R@1000: - 0.8758 - - name: splade-pp-sd-cached_q+rm3 + - name: splade-pp-sd-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.6456 R@1000: - 0.8793 - - name: splade-pp-sd-cached_q+rocchio + - name: splade-pp-sd-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl19-passage.unicoil-noexp.yaml b/src/main/resources/regression/dl19-passage.unicoil-noexp.cached.yaml similarity index 94% rename from src/main/resources/regression/dl19-passage.unicoil-noexp.yaml rename to src/main/resources/regression/dl19-passage.unicoil-noexp.cached.yaml index aa6dcb04d5..12ed08ddf0 100644 --- a/src/main/resources/regression/dl19-passage.unicoil-noexp.yaml +++ b/src/main/resources/regression/dl19-passage.unicoil-noexp.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: unicoil-noexp-cached_q + - name: unicoil-noexp-cached display: uniCOIL (no expansions) params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.5629 R@1000: - 0.7752 - - name: unicoil-noexp-cached_q+rm3 + - name: unicoil-noexp-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.5915 R@1000: - 0.8019 - - name: unicoil-noexp-cached_q+rocchio + - name: unicoil-noexp-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl19-passage.unicoil.yaml b/src/main/resources/regression/dl19-passage.unicoil.cached.yaml similarity index 95% rename from src/main/resources/regression/dl19-passage.unicoil.yaml rename to src/main/resources/regression/dl19-passage.unicoil.cached.yaml index e8263fa718..a683043347 100644 --- a/src/main/resources/regression/dl19-passage.unicoil.yaml +++ b/src/main/resources/regression/dl19-passage.unicoil.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl19-passage.txt models: - - name: unicoil-cached_q + - name: unicoil-cached display: uniCOIL (with doc2query-T5 expansions) params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.6054 R@1000: - 0.8292 - - name: unicoil-cached_q+rm3 + - name: unicoil-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.6207 R@1000: - 0.8598 - - name: unicoil-cached_q+rocchio + - name: unicoil-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.yaml b/src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.cached.yaml similarity index 95% rename from src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.yaml rename to src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.cached.yaml index b6fccbb916..f34d17b18a 100644 --- a/src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.yaml +++ b/src/main/resources/regression/dl20-doc-segmented.unicoil-noexp.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-doc.txt models: - - name: unicoil-noexp-cached_q + - name: unicoil-noexp-cached display: uniCOIL w/ doc2query-T5 expansion params: -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: @@ -65,7 +65,7 @@ models: - 0.5872 R@1000: - 0.7623 - - name: unicoil-noexp-cached_q+rm3 + - name: unicoil-noexp-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: @@ -77,7 +77,7 @@ models: - 0.6381 R@1000: - 0.8117 - - name: unicoil-noexp-cached_q+rocchio + - name: unicoil-noexp-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: diff --git a/src/main/resources/regression/dl20-doc-segmented.unicoil.yaml b/src/main/resources/regression/dl20-doc-segmented.unicoil.cached.yaml similarity index 96% rename from src/main/resources/regression/dl20-doc-segmented.unicoil.yaml rename to src/main/resources/regression/dl20-doc-segmented.unicoil.cached.yaml index 4b8d92dbe3..e0a32af196 100644 --- a/src/main/resources/regression/dl20-doc-segmented.unicoil.yaml +++ b/src/main/resources/regression/dl20-doc-segmented.unicoil.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-doc.txt models: - - name: unicoil-cached_q + - name: unicoil-cached display: uniCOIL w/ doc2query-T5 expansion params: -impact -pretokenized -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: @@ -65,7 +65,7 @@ models: - 0.6210 R@1000: - 0.7869 - - name: unicoil-cached_q+rm3 + - name: unicoil-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: @@ -77,7 +77,7 @@ models: - 0.6499 R@1000: - 0.8229 - - name: unicoil-cached_q+rocchio + - name: unicoil-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 results: diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml rename to src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index 4f535889ce..e7556640cb 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.yaml b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.yaml rename to src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml index 32cb5e3276..a4f206ea3e 100644 --- a/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.yaml +++ b/src/main/resources/regression/dl20-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml rename to src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index 5205fbd4e7..d1e2122a67 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.yaml b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.yaml rename to src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index d65daa337c..98f8a1b6f7 100644 --- a/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.yaml +++ b/src/main/resources/regression/dl20-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.yaml rename to src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml index eff9fd70f8..3752d632c3 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.yaml b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.yaml rename to src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml index 3861a45c56..93707fb267 100644 --- a/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.yaml +++ b/src/main/resources/regression/dl20-passage.cos-dpr-distil.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.yaml rename to src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml index 5a0c06a270..9525e0d0e5 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.yaml b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/dl20-passage.openai-ada2.hnsw.yaml rename to src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml index dbe93e9107..8aa54378a9 100644 --- a/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.yaml +++ b/src/main/resources/regression/dl20-passage.openai-ada2.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/dl20-passage.splade-pp-ed.yaml b/src/main/resources/regression/dl20-passage.splade-pp-ed.cached.yaml similarity index 94% rename from src/main/resources/regression/dl20-passage.splade-pp-ed.yaml rename to src/main/resources/regression/dl20-passage.splade-pp-ed.cached.yaml index afc698addf..17a0fff7d5 100644 --- a/src/main/resources/regression/dl20-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/dl20-passage.splade-pp-ed.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.7653 R@1000: - 0.8995 - - name: splade-pp-ed-cached_q+rm3 + - name: splade-pp-ed-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.7553 R@1000: - 0.9046 - - name: splade-pp-ed-cached_q+rocchio + - name: splade-pp-ed-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl20-passage.splade-pp-sd.yaml b/src/main/resources/regression/dl20-passage.splade-pp-sd.cached.yaml similarity index 94% rename from src/main/resources/regression/dl20-passage.splade-pp-sd.yaml rename to src/main/resources/regression/dl20-passage.splade-pp-sd.cached.yaml index 7484544aa9..947a9dc0be 100644 --- a/src/main/resources/regression/dl20-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/dl20-passage.splade-pp-sd.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.7512 R@1000: - 0.9023 - - name: splade-pp-sd-cached_q+rm3 + - name: splade-pp-sd-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.7631 R@1000: - 0.9174 - - name: splade-pp-sd-cached_q+rocchio + - name: splade-pp-sd-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl20-passage.unicoil-noexp.yaml b/src/main/resources/regression/dl20-passage.unicoil-noexp.cached.yaml similarity index 94% rename from src/main/resources/regression/dl20-passage.unicoil-noexp.yaml rename to src/main/resources/regression/dl20-passage.unicoil-noexp.cached.yaml index e85d1d4bd0..76311e899f 100644 --- a/src/main/resources/regression/dl20-passage.unicoil-noexp.yaml +++ b/src/main/resources/regression/dl20-passage.unicoil-noexp.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: unicoil-noexp-cached_q + - name: unicoil-noexp-cached display: uniCOIL (no expansions) params: -impact -pretokenized results: @@ -67,7 +67,7 @@ models: - 0.6658 R@1000: - 0.7861 - - name: unicoil-noexp-cached_q+rm3 + - name: unicoil-noexp-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -79,7 +79,7 @@ models: - 0.6629 R@1000: - 0.8091 - - name: unicoil-noexp-cached_q+rocchio + - name: unicoil-noexp-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl20-passage.unicoil.yaml b/src/main/resources/regression/dl20-passage.unicoil.cached.yaml similarity index 95% rename from src/main/resources/regression/dl20-passage.unicoil.yaml rename to src/main/resources/regression/dl20-passage.unicoil.cached.yaml index 2ce526e893..5e40754f13 100644 --- a/src/main/resources/regression/dl20-passage.unicoil.yaml +++ b/src/main/resources/regression/dl20-passage.unicoil.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.dl20-passage.txt models: - - name: unicoil-cached_q + - name: unicoil-cached display: uniCOIL (with doc2query-T5 expansions) params: -impact -pretokenized results: @@ -65,7 +65,7 @@ models: - 0.7006 R@1000: - 0.8430 - - name: unicoil-cached_q+rm3 + - name: unicoil-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 results: @@ -77,7 +77,7 @@ models: - 0.6822 R@1000: - 0.8417 - - name: unicoil-cached_q+rocchio + - name: unicoil-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio results: diff --git a/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.yaml b/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.cached.yaml similarity index 96% rename from src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.yaml rename to src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.cached.yaml index 292bf6d1eb..c4d583e6b7 100644 --- a/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.yaml +++ b/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-doc.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3700 R@1000: - 0.7069 - - name: unicoil-0shot+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.4233 R@1000: - 0.7611 - - name: unicoil-0shot+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.yaml b/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.cached.yaml similarity index 98% rename from src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.yaml rename to src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.cached.yaml index c57d30d5cc..c52ef07497 100644 --- a/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.yaml +++ b/src/main/resources/regression/dl21-doc-segmented.unicoil-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-doc.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: diff --git a/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml similarity index 95% rename from src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.yaml rename to src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml index e52167890f..e374d75205 100644 --- a/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.yaml +++ b/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-doc.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3563 R@1000: - 0.6787 - - name: unicoil-noexp-0shot+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.4048 R@1000: - 0.7506 - - name: unicoil-noexp-0shot+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.cached.yaml similarity index 98% rename from src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.cached.yaml index 5cfb41d08b..93ab21f1a3 100644 --- a/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl21-doc-segmented.unicoil-noexp-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-doc.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: diff --git a/src/main/resources/regression/dl21-passage.splade-pp-ed.yaml b/src/main/resources/regression/dl21-passage.splade-pp-ed.cached.yaml similarity index 95% rename from src/main/resources/regression/dl21-passage.splade-pp-ed.yaml rename to src/main/resources/regression/dl21-passage.splade-pp-ed.cached.yaml index 721e490f45..f1c6ea9018 100644 --- a/src/main/resources/regression/dl21-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/dl21-passage.splade-pp-ed.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-passage.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.5619 R@1000: - 0.8586 - - name: splade-pp-ed-cached_q+rm3 + - name: splade-pp-ed-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.5693 R@1000: - 0.8705 - - name: splade-pp-ed-cached_q+rocchio + - name: splade-pp-ed-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl21-passage.splade-pp-sd.yaml b/src/main/resources/regression/dl21-passage.splade-pp-sd.cached.yaml similarity index 95% rename from src/main/resources/regression/dl21-passage.splade-pp-sd.yaml rename to src/main/resources/regression/dl21-passage.splade-pp-sd.cached.yaml index 5eedec2eb1..06b29e5279 100644 --- a/src/main/resources/regression/dl21-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/dl21-passage.splade-pp-sd.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-passage.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.5477 R@1000: - 0.8525 - - name: splade-pp-sd-cached_q+rm3 + - name: splade-pp-sd-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.5588 R@1000: - 0.8655 - - name: splade-pp-sd-cached_q+rocchio + - name: splade-pp-sd-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl21-passage.unicoil-0shot.yaml b/src/main/resources/regression/dl21-passage.unicoil-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl21-passage.unicoil-0shot.yaml rename to src/main/resources/regression/dl21-passage.unicoil-0shot.cached.yaml index b73e22f60b..cab7abc06a 100644 --- a/src/main/resources/regression/dl21-passage.unicoil-0shot.yaml +++ b/src/main/resources/regression/dl21-passage.unicoil-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-passage.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.4731 R@1000: - 0.7551 - - name: unicoil-0shot-cached_q+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.5141 R@1000: - 0.7889 - - name: unicoil-0shot-cached_q+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.cached.yaml index da39e9133e..c0d75b31b7 100644 --- a/src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl21-passage.unicoil-noexp-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl21-passage.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.4246 R@1000: - 0.6897 - - name: unicoil-noexp-0shot-cached_q+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.4811 R@1000: - 0.7309 - - name: unicoil-noexp-0shot-cached_q+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.yaml b/src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.cached.yaml similarity index 96% rename from src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.yaml rename to src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.cached.yaml index 5602238aa1..dcd8faee91 100644 --- a/src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.yaml +++ b/src/main/resources/regression/dl22-doc-segmented.unicoil-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-doc.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2656 R@1000: - 0.5235 - - name: unicoil-0shot-cached_q+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.2810 R@1000: - 0.5586 - - name: unicoil-0shot-cached_q+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml similarity index 95% rename from src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.yaml rename to src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml index 1cf5fe5131..2c6fa4fc80 100644 --- a/src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.yaml +++ b/src/main/resources/regression/dl22-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-doc.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2335 R@1000: - 0.4779 - - name: unicoil-noexp-0shot-cached_q+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.2534 R@1000: - 0.5161 - - name: unicoil-noexp-0shot-cached_q+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-passage.splade-pp-ed.yaml b/src/main/resources/regression/dl22-passage.splade-pp-ed.cached.yaml similarity index 95% rename from src/main/resources/regression/dl22-passage.splade-pp-ed.yaml rename to src/main/resources/regression/dl22-passage.splade-pp-ed.cached.yaml index f6b827dfb6..cdff36d235 100644 --- a/src/main/resources/regression/dl22-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/dl22-passage.splade-pp-ed.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-passage.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3742 R@1000: - 0.6629 - - name: splade-pp-ed-cached_q+rm3 + - name: splade-pp-ed-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3561 R@1000: - 0.6367 - - name: splade-pp-ed-cached_q+rocchio + - name: splade-pp-ed-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-passage.splade-pp-sd.yaml b/src/main/resources/regression/dl22-passage.splade-pp-sd.cached.yaml similarity index 95% rename from src/main/resources/regression/dl22-passage.splade-pp-sd.yaml rename to src/main/resources/regression/dl22-passage.splade-pp-sd.cached.yaml index 095b68482f..372da1cd34 100644 --- a/src/main/resources/regression/dl22-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/dl22-passage.splade-pp-sd.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-passage.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3681 R@1000: - 0.6551 - - name: splade-pp-sd-cached_q+rm3 + - name: splade-pp-sd-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3693 R@1000: - 0.6350 - - name: splade-pp-sd-cached_q+rocchio + - name: splade-pp-sd-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-passage.unicoil-0shot.yaml b/src/main/resources/regression/dl22-passage.unicoil-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl22-passage.unicoil-0shot.yaml rename to src/main/resources/regression/dl22-passage.unicoil-0shot.cached.yaml index 9b80a4f186..75c449b2ba 100644 --- a/src/main/resources/regression/dl22-passage.unicoil-0shot.yaml +++ b/src/main/resources/regression/dl22-passage.unicoil-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-passage.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2716 R@1000: - 0.5253 - - name: unicoil-0shot-cached_q+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.2751 R@1000: - 0.5372 - - name: unicoil-0shot-cached_q+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.cached.yaml index 1f08f9ba06..fc2409830c 100644 --- a/src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl22-passage.unicoil-noexp-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl22-passage.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2151 R@1000: - 0.4423 - - name: unicoil-noexp-0shot-cached_q+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.2391 R@1000: - 0.4684 - - name: unicoil-noexp-0shot-cached_q+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.yaml b/src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.cached.yaml similarity index 96% rename from src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.yaml rename to src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.cached.yaml index 38f6c8997d..b6f0582324 100644 --- a/src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.yaml +++ b/src/main/resources/regression/dl23-doc-segmented.unicoil-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-doc.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3101 R@1000: - 0.5753 - - name: unicoil-0shot-cached_q+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3380 R@1000: - 0.6067 - - name: unicoil-0shot-cached_q+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml similarity index 95% rename from src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.yaml rename to src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml index 44a69d9b11..8572b79c9c 100644 --- a/src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.yaml +++ b/src/main/resources/regression/dl23-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-doc.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2949 R@1000: - 0.5462 - - name: unicoil-noexp-0shot-cached_q+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3274 R@1000: - 0.6011 - - name: unicoil-noexp-0shot-cached_q+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-passage.splade-pp-ed.yaml b/src/main/resources/regression/dl23-passage.splade-pp-ed.cached.yaml similarity index 95% rename from src/main/resources/regression/dl23-passage.splade-pp-ed.yaml rename to src/main/resources/regression/dl23-passage.splade-pp-ed.cached.yaml index 658e64c671..13fb7acdf5 100644 --- a/src/main/resources/regression/dl23-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/dl23-passage.splade-pp-ed.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-passage.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.4137 R@1000: - 0.6779 - - name: splade-pp-ed-cached_q+rm3 + - name: splade-pp-ed-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3979 R@1000: - 0.6620 - - name: splade-pp-ed-cached_q+rocchio + - name: splade-pp-ed-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-passage.splade-pp-sd.yaml b/src/main/resources/regression/dl23-passage.splade-pp-sd.cached.yaml similarity index 95% rename from src/main/resources/regression/dl23-passage.splade-pp-sd.yaml rename to src/main/resources/regression/dl23-passage.splade-pp-sd.cached.yaml index 00d77f38ff..2d17930b66 100644 --- a/src/main/resources/regression/dl23-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/dl23-passage.splade-pp-sd.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-passage.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -parallelism 16 -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.4137 R@1000: - 0.6731 - - name: splade-pp-sd-cached_q+rm3 + - name: splade-pp-sd-cached+rm3 display: +RM3 params: -parallelism 16 -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3892 R@1000: - 0.6486 - - name: splade-pp-sd-cached_q+rocchio + - name: splade-pp-sd-cached+rocchio display: +Rocchio params: -parallelism 16 -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-passage.unicoil-0shot.yaml b/src/main/resources/regression/dl23-passage.unicoil-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl23-passage.unicoil-0shot.yaml rename to src/main/resources/regression/dl23-passage.unicoil-0shot.cached.yaml index 066ee852fd..08fc4877e2 100644 --- a/src/main/resources/regression/dl23-passage.unicoil-0shot.yaml +++ b/src/main/resources/regression/dl23-passage.unicoil-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-passage.txt models: - - name: unicoil-0shot-cached_q + - name: unicoil-0shot-cached display: uniCOIL (with doc2query-T5) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.3293 R@1000: - 0.5541 - - name: unicoil-0shot-cached_q+rm3 + - name: unicoil-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.3126 R@1000: - 0.5541 - - name: unicoil-0shot-cached_q+rocchio + - name: unicoil-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.cached.yaml similarity index 95% rename from src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.cached.yaml index 9e0a7ec020..7ba0401f09 100644 --- a/src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.yaml +++ b/src/main/resources/regression/dl23-passage.unicoil-noexp-0shot.cached.yaml @@ -61,7 +61,7 @@ topics: qrel: qrels.dl23-passage.txt models: - - name: unicoil-noexp-0shot-cached_q + - name: unicoil-noexp-0shot-cached display: uniCOIL (noexp) zero-shot params: -impact -pretokenized results: @@ -75,7 +75,7 @@ models: - 0.2761 R@1000: - 0.5070 - - name: unicoil-noexp-0shot-cached_q+rm3 + - name: unicoil-noexp-0shot-cached+rm3 display: +RM3 params: -impact -pretokenized -rm3 -collection JsonVectorCollection results: @@ -89,7 +89,7 @@ models: - 0.2811 R@1000: - 0.5076 - - name: unicoil-noexp-0shot-cached_q+rocchio + - name: unicoil-noexp-0shot-cached+rocchio display: +Rocchio params: -impact -pretokenized -rocchio -collection JsonVectorCollection results: diff --git a/src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.yaml b/src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.yaml rename to src/main/resources/regression/msmarco-v1-doc-segmented.unicoil-noexp.cached.yaml diff --git a/src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.yaml b/src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.yaml rename to src/main/resources/regression/msmarco-v1-doc-segmented.unicoil.cached.yaml diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml rename to src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml index ae8fc31ebe..75b2c5af3f 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml rename to src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml index d021c1a9ba..8f988721aa 100644 --- a/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.bge-base-en-v1.5.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: bge-hnsw-cached_q + - name: bge-hnsw-cached display: BGE-base-en-v1.5 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml rename to src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml index 9d07f40159..6c7d9641e7 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml rename to src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml index e475bad88b..80a5b04ca3 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cohere-embed-english-v3.0.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cohere-embed-english-v3.0-cached_q + - name: cohere-embed-english-v3.0-cached display: cohere-embed-english-v3.0 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml similarity index 97% rename from src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml rename to src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml index 4e087dc9a0..8aae7ac52e 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.yaml b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml similarity index 97% rename from src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.yaml rename to src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml index 2ec7427dd8..0e11ea03f7 100644 --- a/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.cos-dpr-distil.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: cos-dpr-distil-hnsw-cached_q + - name: cos-dpr-distil-hnsw-cached display: cosDPR-distil type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.deepimpact.yaml b/src/main/resources/regression/msmarco-v1-passage.deepimpact.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v1-passage.deepimpact.yaml rename to src/main/resources/regression/msmarco-v1-passage.deepimpact.cached.yaml index a2e4889da5..5859e3dc56 100644 --- a/src/main/resources/regression/msmarco-v1-passage.deepimpact.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.deepimpact.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: deepimpact-cached_q + - name: deepimpact-cached display: DeepImpact params: -impact -pretokenized results: diff --git a/src/main/resources/regression/msmarco-v1-passage.distill-splade-max.yaml b/src/main/resources/regression/msmarco-v1-passage.distill-splade-max.cached.yaml similarity index 97% rename from src/main/resources/regression/msmarco-v1-passage.distill-splade-max.yaml rename to src/main/resources/regression/msmarco-v1-passage.distill-splade-max.cached.yaml index 7ab0805466..8a51ed03b7 100644 --- a/src/main/resources/regression/msmarco-v1-passage.distill-splade-max.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.distill-splade-max.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: distill-splade-max-cached_q + - name: distill-splade-max-cached display: DistilSPLADE-max params: -impact -pretokenized results: diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.yaml rename to src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml index d86e040de1..8b4f084f1d 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw-int8.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.yaml b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.yaml rename to src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml index 7b44bb6e85..f3a193fd57 100644 --- a/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.openai-ada2.hnsw.cached.yaml @@ -50,7 +50,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: openai-ada2-cached_q + - name: openai-ada2-cached display: OpenAI-ada2 type: hnsw params: -generator VectorQueryGenerator -topicField vector -threads 16 -hits 1000 -efSearch 1000 diff --git a/src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.yaml b/src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.cached.yaml similarity index 95% rename from src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.yaml rename to src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.cached.yaml index dfc4029952..98ec0c0b89 100644 --- a/src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.splade-pp-ed.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -impact -pretokenized results: @@ -68,7 +68,7 @@ models: # PRF regressions are no longer maintained for sparse judgments to reduce running times. # (commenting out instead of removing; in case these numbers are needed, just uncomment and rerun.) # -# - name: splade-pp-ed-cached_q+rm3 +# - name: splade-pp-ed-cached+rm3 # display: +RM3 # params: -impact -pretokenized -rm3 # results: @@ -80,7 +80,7 @@ models: # - 0.8728 # R@1000: # - 0.9744 -# - name: splade-pp-ed-cached_q+rocchio +# - name: splade-pp-ed-cached+rocchio # display: +Rocchio # params: -impact -pretokenized -rocchio # results: diff --git a/src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.yaml b/src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.cached.yaml similarity index 95% rename from src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.yaml rename to src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.cached.yaml index c6a822493c..86348b78a8 100644 --- a/src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/msmarco-v1-passage.splade-pp-sd.cached.yaml @@ -53,7 +53,7 @@ topics: qrel: qrels.msmarco-passage.dev-subset.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -impact -pretokenized results: @@ -68,7 +68,7 @@ models: # PRF regressions are no longer maintained for sparse judgments to reduce running times. # (commenting out instead of removing; in case these numbers are needed, just uncomment and rerun.) # -# - name: splade-pp-sd-cached_q+rm3 +# - name: splade-pp-sd-cached+rm3 # display: +RM3 # params: -impact -pretokenized -rm3 # results: @@ -80,7 +80,7 @@ models: # - 0.8681 # R@1000: # - 0.9739 -# - name: splade-pp-sd-cached_q+rocchio +# - name: splade-pp-sd-cached+rocchio # display: +Rocchio # params: -impact -pretokenized -rocchio # results: diff --git a/src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.yaml b/src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.yaml rename to src/main/resources/regression/msmarco-v1-passage.unicoil-noexp.cached.yaml diff --git a/src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.yaml b/src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.yaml rename to src/main/resources/regression/msmarco-v1-passage.unicoil-tilde-expansion.cached.yaml diff --git a/src/main/resources/regression/msmarco-v1-passage.unicoil.yaml b/src/main/resources/regression/msmarco-v1-passage.unicoil.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v1-passage.unicoil.yaml rename to src/main/resources/regression/msmarco-v1-passage.unicoil.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.yaml rename to src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot-v2.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.yaml rename to src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-0shot.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.yaml rename to src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot-v2.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/msmarco-v2-doc-segmented.unicoil-noexp-0shot.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.yaml b/src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.yaml rename to src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.cached.yaml index 64fd1f1e84..d158c7fc35 100644 --- a/src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.yaml +++ b/src/main/resources/regression/msmarco-v2-passage.splade-pp-ed.cached.yaml @@ -58,7 +58,7 @@ topics: qrel: qrels.msmarco-v2-passage.dev2.txt models: - - name: splade-pp-ed-cached_q + - name: splade-pp-ed-cached display: SPLADE++ CoCondenser-EnsembleDistil params: -parallelism 16 -impact -pretokenized results: diff --git a/src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.yaml b/src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.cached.yaml similarity index 98% rename from src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.yaml rename to src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.cached.yaml index c8efc779a1..42d7c062e1 100644 --- a/src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.yaml +++ b/src/main/resources/regression/msmarco-v2-passage.splade-pp-sd.cached.yaml @@ -58,7 +58,7 @@ topics: qrel: qrels.msmarco-v2-passage.dev2.txt models: - - name: splade-pp-sd-cached_q + - name: splade-pp-sd-cached display: SPLADE++ CoCondenser-SelfDistil params: -parallelism 16 -impact -pretokenized results: diff --git a/src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.yaml b/src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.yaml rename to src/main/resources/regression/msmarco-v2-passage.unicoil-0shot.cached.yaml diff --git a/src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.cached.yaml similarity index 100% rename from src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.yaml rename to src/main/resources/regression/msmarco-v2-passage.unicoil-noexp-0shot.cached.yaml