From 34523ec542c21364fffefe726e6226d7f8ffa923 Mon Sep 17 00:00:00 2001 From: gashutos <gashutos@amazon.com> Date: Mon, 28 Aug 2023 18:18:13 +0530 Subject: [PATCH 1/4] Honor topvalue while determining isMissingvalueCompetitive in case bottom is not set Signed-off-by: gashutos <gashutos@amazon.com> --- lucene/CHANGES.txt | 2 ++ .../search/comparators/DoubleComparator.java | 10 +++++--- .../search/comparators/FloatComparator.java | 10 +++++--- .../search/comparators/IntComparator.java | 12 ++++++---- .../search/comparators/LongComparator.java | 12 ++++++---- .../search/comparators/NumericComparator.java | 24 ++++++++++++++++++- .../lucene/search/TestSortOptimization.java | 17 +++++++++++++ 7 files changed, 70 insertions(+), 17 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3a88efece170..0fc0c9b0c762 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -99,6 +99,8 @@ Bug Fixes * GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end +* LUCENE-12521: Sort After returning in-correct result when missing values are competitive + Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java index 68fc72c7472c..c1f8ee1aa3a8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/DoubleComparator.java @@ -96,9 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Double.compare(missingValue, bottom); - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Double.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Double.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java index 8bb9a22417d8..9eaff6631af1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/FloatComparator.java @@ -96,9 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Float.compare(missingValue, bottom); - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Float.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Float.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java index d3576e9ee076..46724611338a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/IntComparator.java @@ -96,11 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Integer.compare(missingValue, bottom); - // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, - // in asc sort missingValue is competitive when it's smaller or equal to bottom - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Integer.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Integer.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java index a96a163b44e2..46b774af0b60 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/LongComparator.java @@ -96,11 +96,13 @@ public void copy(int slot, int doc) throws IOException { } @Override - protected boolean isMissingValueCompetitive() { - int result = Long.compare(missingValue, bottom); - // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, - // in asc sort missingValue is competitive when it's smaller or equal to bottom - return reverse ? (result >= 0) : (result <= 0); + protected int compareMissingValueWithBottomValue() { + return Long.compare(missingValue, bottom); + } + + @Override + protected int compareMissingValueWithTopValue() { + return Long.compare(missingValue, topValue); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index ea75530fb2b4..cb699192bbda 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -309,6 +309,26 @@ private void updateSkipInterval(boolean success) { } } + private boolean isMissingValueCompetitive() { + // if queue is full, always compare with bottom, + // if not, check if we can compare with topValue + if (queueFull) { + int result = compareMissingValueWithBottomValue(); + // in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom, + // in asc sort missingValue is competitive when it's smaller or equal to bottom + return reverse ? (result >= 0) : (result <= 0); + } else if (topValueSet) { + int result = compareMissingValueWithTopValue(); + // in reverse (desc) sort missingValue is competitive when it's smaller or equal to + // topValue, + // in asc sort missingValue is competitive when it's greater or equal to topValue + return reverse ? (result <= 0) : (result >= 0); + } else { + // by default competitive + return true; + } + } + @Override public DocIdSetIterator competitiveIterator() { if (enableSkipping == false) return null; @@ -337,7 +357,9 @@ public int advance(int target) throws IOException { }; } - protected abstract boolean isMissingValueCompetitive(); + protected abstract int compareMissingValueWithTopValue(); + + protected abstract int compareMissingValueWithBottomValue(); protected abstract void encodeBottom(byte[] packedValue); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index d30146f39a3c..c6ec61120e7a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -239,6 +239,23 @@ public void testSortOptimizationWithMissingValues() throws IOException { assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs); } + { // test that optimization is not run when missing value setting of SortField is competitive + // with after + final long afterValue = Long.MAX_VALUE; + final int afterDocID = 10 + random().nextInt(1000); + FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); + final SortField sortField = new SortField("my_field", SortField.Type.LONG); + sortField.setMissingValue(Long.MAX_VALUE); // set a NON competitive missing value + final Sort sort = new Sort(sortField); + CollectorManager<TopFieldCollector, TopFieldDocs> manager = + TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); + TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); + assertEquals(topDocs.scoreDocs.length, numHits); + assertEquals( + topDocs.totalHits.value, + numDocs); // assert that all documents were collected => optimization was not run + } + reader.close(); dir.close(); } From fc21f11935674c8bd7f6f19eefd53a5a10578a3e Mon Sep 17 00:00:00 2001 From: gashutos <gashutos@amazon.com> Date: Wed, 30 Aug 2023 17:36:45 +0530 Subject: [PATCH 2/4] Adding missing value competitiveness check for desc order sort with after as missing value Signed-off-by: gashutos <gashutos@amazon.com> --- .../lucene/search/TestSortOptimization.java | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index c6ec61120e7a..682fe008ae47 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -240,7 +240,7 @@ public void testSortOptimizationWithMissingValues() throws IOException { } { // test that optimization is not run when missing value setting of SortField is competitive - // with after + // with after on asc order final long afterValue = Long.MAX_VALUE; final int afterDocID = 10 + random().nextInt(1000); FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); @@ -256,6 +256,23 @@ public void testSortOptimizationWithMissingValues() throws IOException { numDocs); // assert that all documents were collected => optimization was not run } + { // test that optimization is not run when missing value setting of SortField is competitive + // with after on desc order + final long afterValue = Long.MAX_VALUE; + final int afterDocID = 10 + random().nextInt(1000); + FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); + final SortField sortField = new SortField("my_field", SortField.Type.LONG, true); + sortField.setMissingValue(Long.MAX_VALUE); // set a NON competitive missing value + final Sort sort = new Sort(sortField); + CollectorManager<TopFieldCollector, TopFieldDocs> manager = + TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); + TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager); + assertEquals(topDocs.scoreDocs.length, numHits); + assertEquals( + topDocs.totalHits.value, + numDocs); // assert that all documents were collected => optimization was not run + } + reader.close(); dir.close(); } From 29e119c9285b55e1b491bae2d5d611f9d4ed1d26 Mon Sep 17 00:00:00 2001 From: gashutos <gashutos@amazon.com> Date: Wed, 30 Aug 2023 17:42:02 +0530 Subject: [PATCH 3/4] Correcting comments Signed-off-by: gashutos <gashutos@amazon.com> --- .../test/org/apache/lucene/search/TestSortOptimization.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index 682fe008ae47..af0b1d0ed3e7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -245,7 +245,7 @@ public void testSortOptimizationWithMissingValues() throws IOException { final int afterDocID = 10 + random().nextInt(1000); FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); final SortField sortField = new SortField("my_field", SortField.Type.LONG); - sortField.setMissingValue(Long.MAX_VALUE); // set a NON competitive missing value + sortField.setMissingValue(Long.MAX_VALUE); // set a competitive missing value final Sort sort = new Sort(sortField); CollectorManager<TopFieldCollector, TopFieldDocs> manager = TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); @@ -262,7 +262,7 @@ public void testSortOptimizationWithMissingValues() throws IOException { final int afterDocID = 10 + random().nextInt(1000); FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Long[] {afterValue}); final SortField sortField = new SortField("my_field", SortField.Type.LONG, true); - sortField.setMissingValue(Long.MAX_VALUE); // set a NON competitive missing value + sortField.setMissingValue(Long.MAX_VALUE); // set a competitive missing value final Sort sort = new Sort(sortField); CollectorManager<TopFieldCollector, TopFieldDocs> manager = TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold); From 88ddc5cba861fbceb11f7c410a2775d0202c22ce Mon Sep 17 00:00:00 2001 From: Chaitanya Gohel <104654647+gashutos@users.noreply.github.com> Date: Mon, 4 Sep 2023 20:40:50 +0530 Subject: [PATCH 4/4] Update CHANGES.txt --- lucene/CHANGES.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0fc0c9b0c762..b8d2c33174de 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -99,8 +99,6 @@ Bug Fixes * GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end -* LUCENE-12521: Sort After returning in-correct result when missing values are competitive - Other --------------------- @@ -204,6 +202,8 @@ Bug Fixes impact on the user, unless you explicitly invoke the convert function of UTF32ToUTF8, and in the extremely rare scenario of searching a non-UTF-8 inverted field with Unicode search terms (Tang Donghai). +* LUCENE-12521: Sort After returning in-correct result when missing values are competitive. (Chaitanya Gohel) + Other ---------------------