From 2f1b66586073f1fc4e8913d1119fbbf478745013 Mon Sep 17 00:00:00 2001 From: Emily Wang Date: Wed, 14 Aug 2019 01:12:57 -0400 Subject: [PATCH] Updated score tie breaking in bm25prf (#777) --- .../java/io/anserini/rerank/lib/BM25PrfReranker.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/io/anserini/rerank/lib/BM25PrfReranker.java b/src/main/java/io/anserini/rerank/lib/BM25PrfReranker.java index 5e991449b6..021399f5fc 100644 --- a/src/main/java/io/anserini/rerank/lib/BM25PrfReranker.java +++ b/src/main/java/io/anserini/rerank/lib/BM25PrfReranker.java @@ -48,6 +48,8 @@ import java.util.Set; import static io.anserini.index.generator.LuceneDocumentGenerator.FIELD_BODY; +import static io.anserini.search.SearchCollection.BREAK_SCORE_TIES_BY_DOCID; +import static io.anserini.search.SearchCollection.BREAK_SCORE_TIES_BY_TWEETID; class BM25PrfSimilarity extends BM25Similarity { @@ -110,7 +112,14 @@ public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) { TopDocs rs; try { - rs = searcher.search(newQuery, context.getSearchArgs().hits); + // Figure out how to break the scoring ties. + if (context.getSearchArgs().arbitraryScoreTieBreak) { + rs = searcher.search(newQuery, context.getSearchArgs().hits); + } else if (context.getSearchArgs().searchtweets) { + rs = searcher.search(newQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true); + } else { + rs = searcher.search(newQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true); + } } catch (IOException e) { e.printStackTrace(); return docs;