From 3f1952b53c42ed86a5ac516724fc742edbd0fa32 Mon Sep 17 00:00:00 2001 From: Chaitanya Gohel <104654647+gashutos@users.noreply.github.com> Date: Mon, 4 Sep 2023 21:55:34 +0530 Subject: [PATCH] Honor topvalue while determining isMissingvalueCompetitive in case bottom is not set (#12520) --- lucene/CHANGES.txt | 2 ++ .../search/comparators/DoubleComparator.java | 10 ++++-- .../search/comparators/FloatComparator.java | 10 ++++-- .../search/comparators/IntComparator.java | 12 ++++--- .../search/comparators/LongComparator.java | 12 ++++--- .../search/comparators/NumericComparator.java | 24 ++++++++++++- .../lucene/search/TestSortOptimization.java | 34 +++++++++++++++++++ 7 files changed, 87 insertions(+), 17 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 50bd6dc87f59..8865ab7b7b5b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -103,6 +103,8 @@ Bug Fixes impact on the user, unless you explicitly invoke the convert function of UTF32ToUTF8, and in the extremely rare scenario of searching a non-UTF-8 inverted field with Unicode search terms (Tang Donghai). +* LUCENE-12521: Sort After returning in-correct result when missing values are competitive. (Chaitanya Gohel) + Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java index 68fc72c7472c..c1f8ee1aa3a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java @@ -96,9 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Double.compare(missingValue, bottom); - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Double.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Double.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java index 8bb9a22417d8..9eaff6631af1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java @@ -96,9 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Float.compare(missingValue, bottom); - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Float.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Float.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java index d3576e9ee076..46724611338a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java @@ -96,11 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Integer.compare(missingValue, bottom); - // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, - // in asc sort missingValue is competitive when it's smaller or equal to bottom - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Integer.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Integer.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java index a96a163b44e2..46b774af0b60 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java @@ -96,11 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Long.compare(missingValue, bottom); - // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, - // in asc sort missingValue is competitive when it's smaller or equal to bottom - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Long.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Long.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index ea75530fb2b4..cb699192bbda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -309,6 +309,26 @@ private void updateSkipInterval(boolean success) { } } + private boolean isMissingValueCompetitive() { + // if queue is full, always compare with bottom, + // if not, check if we can compare with topValue + if (queueFull) { + int result = compareMissingValueWithBottomValue(); + // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, + // in asc sort missingValue is competitive when it's smaller or equal to bottom + return reverse ? (result >= 0) : (result <= 0); + } else if (topValueSet) { + int result = compareMissingValueWithTopValue(); + // in reverse (desc) sort missingValue is competitive when it's smaller or equal to + // topValue, + // in asc sort missingValue is competitive when it's greater or equal to topValue + return reverse ? (result <= 0) : (result >= 0); + } else { + // by default competitive + return true; + } + } + @Override public DocIdSetIterator competitiveIterator() { if (enableSkipping == false) return null; @@ -337,7 +357,9 @@ public int advance(int target) throws IOException { }; } - protected abstract boolean isMissingValueCompetitive(); + protected abstract int compareMissingValueWithTopValue(); + + protected abstract int compareMissingValueWithBottomValue(); protected abstract void encodeBottom(byte[] packedValue); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index d30146f39a3c..af0b1d0ed3e7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -239,6 +239,40 @@ public void testSortOptimizationWithMissingValues() throws IOException { assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs); } + { // test that optimization is not run when missing value setting of SortField is competitive + // with after on asc order + final long afterValue = Long.MAX_VALUE; + final int afterDocID = 10 + random().nextInt(1000); + FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); + final SortField sortField = new SortField("my_field", SortField.Type.LONG); + sortField.setMissingValue(Long.MAX_VALUE); // set a competitive missing value + final Sort sort = new Sort(sortField); + CollectorManager manager = + TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); + TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); + assertEquals(topDocs.scoreDocs.length, numHits); + assertEquals( + topDocs.totalHits.value, + numDocs); // assert that all documents were collected => optimization was not run + } + + { // test that optimization is not run when missing value setting of SortField is competitive + // with after on desc order + final long afterValue = Long.MAX_VALUE; + final int afterDocID = 10 + random().nextInt(1000); + FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); + final SortField sortField = new SortField("my_field", SortField.Type.LONG, true); + sortField.setMissingValue(Long.MAX_VALUE); // set a competitive missing value + final Sort sort = new Sort(sortField); + CollectorManager manager = + TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); + TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); + assertEquals(topDocs.scoreDocs.length, numHits); + assertEquals( + topDocs.totalHits.value, + numDocs); // assert that all documents were collected => optimization was not run + } + reader.close(); dir.close(); }