diff --git a/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java b/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java index bb3e34b4e410b..99a7df976371c 100644 --- a/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java +++ b/server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java @@ -13,12 +13,14 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * + * Modifications copyright (C) 2020 Elasticsearch B.V. */ - package org.apache.lucene.search; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.SmallFloat; @@ -28,134 +30,148 @@ import java.util.List; import java.util.Objects; -import static org.apache.lucene.search.XCombinedFieldQuery.FieldAndWeight; - /** - * Copy of {@link LeafSimScorer} that sums document's norms from multiple fields. + * Copy of {@link MultiNormsLeafSimScorer} that contains a fix for LUCENE-9999. + * TODO: remove once LUCENE-9999 is fixed and integrated * - * TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9. + *

This scorer requires that either all fields or no fields have norms enabled. It will throw an + * error if some fields have norms enabled, while others have norms disabled. */ final class XMultiNormsLeafSimScorer { - /** - * Cache of decoded norms. - */ - private static final float[] LENGTH_TABLE = new float[256]; - - static { - for (int i = 0; i < 256; i++) { - LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i); - } - } + /** Cache of decoded norms. */ + private static final float[] LENGTH_TABLE = new float[256]; - private final SimScorer scorer; - private final NumericDocValues norms; - - /** - * Sole constructor: Score documents of {@code reader} with {@code scorer}. - * - */ - XMultiNormsLeafSimScorer(SimScorer scorer, - LeafReader reader, - Collection normFields, - boolean needsScores) throws IOException { - this.scorer = Objects.requireNonNull(scorer); - if (needsScores) { - final List normsList = new ArrayList<>(); - final List weightList = new ArrayList<>(); - for (FieldAndWeight field : normFields) { - NumericDocValues norms = reader.getNormValues(field.field); - if (norms != null) { - normsList.add(norms); - weightList.add(field.weight); - } - } - if (normsList.isEmpty()) { - norms = null; - } else if (normsList.size() == 1) { - norms = normsList.get(0); - } else { - final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]); - final float[] weightArr = new float[normsList.size()]; - for (int i = 0; i < weightList.size(); i++) { - weightArr[i] = weightList.get(i); - } - norms = new XMultiNormsLeafSimScorer.MultiFieldNormValues(normsArr, weightArr); - } - } else { - norms = null; - } + static { + for (int i = 0; i < 256; i++) { + LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i); } - - private long getNormValue(int doc) throws IOException { + } + + private final SimScorer scorer; + private final NumericDocValues norms; + + /** Sole constructor: Score documents of {@code reader} with {@code scorer}. */ + XMultiNormsLeafSimScorer( + SimScorer scorer, + LeafReader reader, + Collection normFields, + boolean needsScores) + throws IOException { + this.scorer = Objects.requireNonNull(scorer); + if (needsScores) { + final List normsList = new ArrayList<>(); + final List weightList = new ArrayList<>(); + for (FieldAndWeight field : normFields) { + NumericDocValues norms = reader.getNormValues(field.field); if (norms != null) { - boolean found = norms.advanceExact(doc); - assert found; - return norms.longValue(); - } else { - return 1L; // default norm + normsList.add(norms); + weightList.add(field.weight); + } + } + + if (normsList.isEmpty() == false && normsList.size() != normFields.size()) { + throw new IllegalArgumentException( + getClass().getSimpleName() + + " requires norms to be consistent across fields: some fields cannot" + + " have norms enabled, while others have norms disabled"); + } + + if (normsList.isEmpty()) { + norms = null; + } else if (normsList.size() == 1) { + norms = normsList.get(0); + } else { + final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]); + final float[] weightArr = new float[normsList.size()]; + for (int i = 0; i < weightList.size(); i++) { + weightArr[i] = weightList.get(i); } + norms = new MultiFieldNormValues(normsArr, weightArr); + } + } else { + norms = null; } - - /** Score the provided document assuming the given term document frequency. - * This method must be called on non-decreasing sequences of doc ids. - * @see SimScorer#score(float, long) */ - public float score(int doc, float freq) throws IOException { - return scorer.score(freq, getNormValue(doc)); + } + + private long getNormValue(int doc) throws IOException { + if (norms != null) { + boolean found = norms.advanceExact(doc); + assert found; + return norms.longValue(); + } else { + return 1L; // default norm } - - /** Explain the score for the provided document assuming the given term document frequency. - * This method must be called on non-decreasing sequences of doc ids. - * @see SimScorer#explain(Explanation, long) */ - public Explanation explain(int doc, Explanation freqExpl) throws IOException { - return scorer.explain(freqExpl, getNormValue(doc)); + } + + /** + * Score the provided document assuming the given term document frequency. This method must be + * called on non-decreasing sequences of doc ids. + * + * @see SimScorer#score(float, long) + */ + public float score(int doc, float freq) throws IOException { + return scorer.score(freq, getNormValue(doc)); + } + + /** + * Explain the score for the provided document assuming the given term document frequency. This + * method must be called on non-decreasing sequences of doc ids. + * + * @see SimScorer#explain(Explanation, long) + */ + public Explanation explain(int doc, Explanation freqExpl) throws IOException { + return scorer.explain(freqExpl, getNormValue(doc)); + } + + private static class MultiFieldNormValues extends NumericDocValues { + private final NumericDocValues[] normsArr; + private final float[] weightArr; + private long current; + private int docID = -1; + + MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) { + this.normsArr = normsArr; + this.weightArr = weightArr; } - private static class MultiFieldNormValues extends NumericDocValues { - private final NumericDocValues[] normsArr; - private final float[] weightArr; - private long current; - private int docID = -1; - - MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) { - this.normsArr = normsArr; - this.weightArr = weightArr; - } - - @Override - public long longValue() { - return current; - } + @Override + public long longValue() { + return current; + } - @Override - public boolean advanceExact(int target) throws IOException { - float normValue = 0; - for (int i = 0; i < normsArr.length; i++) { - boolean found = normsArr[i].advanceExact(target); - assert found; - normValue += weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())]; - } - current = SmallFloat.intToByte4(Math.round(normValue)); - return true; + @Override + public boolean advanceExact(int target) throws IOException { + float normValue = 0; + boolean found = false; + for (int i = 0; i < normsArr.length; i++) { + if (normsArr[i].advanceExact(target)) { + normValue += + weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())]; + found = true; } + } + current = SmallFloat.intToByte4(Math.round(normValue)); + return found; + } - @Override - public int docID() { - return docID; - } + @Override + public int docID() { + return docID; + } - @Override - public int nextDoc() { - throw new UnsupportedOperationException(); - } + @Override + public int nextDoc() { + throw new UnsupportedOperationException(); + } - @Override - public int advance(int target) { - throw new UnsupportedOperationException(); - } + @Override + public int advance(int target) { + throw new UnsupportedOperationException(); + } - @Override - public long cost() { - throw new UnsupportedOperationException(); - } + @Override + public long cost() { + throw new UnsupportedOperationException(); } + } } diff --git a/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java b/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java index 4f79b51b21022..478d9ccf3161e 100644 --- a/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java +++ b/server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java @@ -13,21 +13,20 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * + * Modifications copyright (C) 2020 Elasticsearch B.V. */ package org.apache.lucene.search; import com.carrotsearch.randomizedtesting.generators.RandomPicks; -import java.io.IOException; -import java.util.Arrays; + import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.BM25Similarity; @@ -37,60 +36,21 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Arrays; + /** - * TODO: this is temporarily copied from Lucene, remove once we update to Lucene 8.9. + * Test for @link {@link XCombinedFieldQuery} + * TODO remove once LUCENE 9999 is fixed and integrated and we remove our copy of the query + * */ public class XCombinedFieldQueryTests extends LuceneTestCase { - public void testInvalid() { - XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); - IllegalArgumentException exc = - expectThrows(IllegalArgumentException.class, () -> builder.addField("foo", 0.5f)); - assertEquals(exc.getMessage(), "weight must be greater or equal to 1"); - } - - public void testRewrite() throws IOException { - XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); - IndexReader reader = new MultiReader(); - IndexSearcher searcher = new IndexSearcher(reader); - Query actual = searcher.rewrite(builder.build()); - assertEquals(actual, new MatchNoDocsQuery()); - builder.addField("field", 1f); - actual = searcher.rewrite(builder.build()); - assertEquals(actual, new MatchNoDocsQuery()); - builder.addTerm(new BytesRef("foo")); - actual = searcher.rewrite(builder.build()); - assertEquals(actual, new TermQuery(new Term("field", "foo"))); - builder.addTerm(new BytesRef("bar")); - actual = searcher.rewrite(builder.build()); - assertEquals( - actual, - new SynonymQuery.Builder("field") - .addTerm(new Term("field", "foo")) - .addTerm(new Term("field", "bar")) - .build()); - builder.addField("another_field", 1f); - Query query = builder.build(); - actual = searcher.rewrite(query); - assertEquals(actual, query); - } - public void testToString() { - assertEquals("CombinedFieldQuery(()())", new XCombinedFieldQuery.Builder().build().toString()); - XCombinedFieldQuery.Builder builder = new XCombinedFieldQuery.Builder(); - builder.addField("foo", 1f); - assertEquals("CombinedFieldQuery((foo)())", builder.build().toString()); - builder.addTerm(new BytesRef("bar")); - assertEquals("CombinedFieldQuery((foo)(bar))", builder.build().toString()); - builder.addField("title", 3f); - assertEquals("CombinedFieldQuery((foo title^3.0)(bar))", builder.build().toString()); - builder.addTerm(new BytesRef("baz")); - assertEquals("CombinedFieldQuery((foo title^3.0)(bar baz))", builder.build().toString()); - } - - public void testSameScore() throws IOException { + public void testNormsDisabled() throws IOException { Directory dir = newDirectory(); Similarity similarity = randomCompatibleSimilarity(); @@ -99,86 +59,77 @@ public void testSameScore() throws IOException { RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); - doc.add(new StringField("f", "a", Store.NO)); + doc.add(new StringField("a", "value", Store.NO)); + doc.add(new StringField("b", "value", Store.NO)); + doc.add(new TextField("c", "value", Store.NO)); w.addDocument(doc); - doc = new Document(); - doc.add(new StringField("g", "a", Store.NO)); - for (int i = 0; i < 10; ++i) { - w.addDocument(doc); - } - IndexReader reader = w.getReader(); IndexSearcher searcher = newSearcher(reader); - searcher.setSimilarity(similarity); - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("f", 1f) - .addField("g", 1f) - .addTerm(new BytesRef("a")) - .build(); - TopScoreDocCollector collector = - TopScoreDocCollector.create( - Math.min(reader.numDocs(), Integer.MAX_VALUE), null, Integer.MAX_VALUE); + + Similarity searchSimilarity = randomCompatibleSimilarity(); + searcher.setSimilarity(searchSimilarity); + TopScoreDocCollector collector = TopScoreDocCollector.create(10, null, 10); + + XCombinedFieldQuery query = new XCombinedFieldQuery.Builder().addField("a", 1.0f) + .addField("b", 1.0f) + .addTerm(new BytesRef("value")) + .build(); searcher.search(query, collector); TopDocs topDocs = collector.topDocs(); - assertEquals(new TotalHits(11, TotalHits.Relation.EQUAL_TO), topDocs.totalHits); - // All docs must have the same score - for (int i = 0; i < topDocs.scoreDocs.length; ++i) { - assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f); - } + assertEquals(new TotalHits(1, TotalHits.Relation.EQUAL_TO), topDocs.totalHits); + + XCombinedFieldQuery invalidQuery = new XCombinedFieldQuery.Builder().addField("b", 1.0f) + .addField("c", 1.0f) + .addTerm(new BytesRef("value")) + .build(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> searcher.search(invalidQuery, collector)); + assertTrue(e.getMessage().contains("requires norms to be consistent across fields")); reader.close(); w.close(); dir.close(); } - public void testCopyField() throws IOException { - Directory dir = newDirectory(); + public void testCopyFieldWithMissingFields() throws IOException { + Directory dir = new MMapDirectory(createTempDir()); Similarity similarity = randomCompatibleSimilarity(); IndexWriterConfig iwc = new IndexWriterConfig(); iwc.setSimilarity(similarity); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - int numMatch = atLeast(10); int boost1 = Math.max(1, random().nextInt(5)); int boost2 = Math.max(1, random().nextInt(5)); + int numMatch = atLeast(10); for (int i = 0; i < numMatch; i++) { Document doc = new Document(); - if (random().nextBoolean()) { - doc.add(new TextField("a", "baz", Store.NO)); - doc.add(new TextField("b", "baz", Store.NO)); - for (int k = 0; k < boost1 + boost2; k++) { - doc.add(new TextField("ab", "baz", Store.NO)); - } - w.addDocument(doc); - doc.clear(); - } int freqA = random().nextInt(5) + 1; for (int j = 0; j < freqA; j++) { doc.add(new TextField("a", "foo", Store.NO)); } - int freqB = random().nextInt(5) + 1; + + // Choose frequencies such that sometimes we don't add field B + int freqB = random().nextInt(3); for (int j = 0; j < freqB; j++) { doc.add(new TextField("b", "foo", Store.NO)); } + int freqAB = freqA * boost1 + freqB * boost2; for (int j = 0; j < freqAB; j++) { doc.add(new TextField("ab", "foo", Store.NO)); } + w.addDocument(doc); } + IndexReader reader = w.getReader(); IndexSearcher searcher = newSearcher(reader); - searcher.setSimilarity(similarity); - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("a", (float) boost1) - .addField("b", (float) boost2) - .addTerm(new BytesRef("foo")) - .build(); + XCombinedFieldQuery query = new XCombinedFieldQuery.Builder().addField("a", boost1) + .addField("b", boost2) + .addTerm(new BytesRef("foo")) + .build(); checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo"))); @@ -187,51 +138,15 @@ public void testCopyField() throws IOException { dir.close(); } - public void testCopyFieldWithMultipleTerms() throws IOException { - Directory dir = newDirectory(); - Similarity similarity = randomCompatibleSimilarity(); - - IndexWriterConfig iwc = new IndexWriterConfig(); - iwc.setSimilarity(similarity); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - - int numMatch = atLeast(10); - int boost1 = Math.max(1, random().nextInt(5)); - int boost2 = Math.max(1, random().nextInt(5)); - for (int i = 0; i < numMatch; i++) { - Document doc = new Document(); - - int freqA = random().nextInt(5) + 1; - for (int j = 0; j < freqA; j++) { - doc.add(new TextField("a", "foo", Store.NO)); - } - int freqB = random().nextInt(5) + 1; - for (int j = 0; j < freqB; j++) { - doc.add(new TextField("b", "bar", Store.NO)); - } - int freqAB = freqA * boost1 + freqB * boost2; - for (int j = 0; j < freqAB; j++) { - doc.add(new TextField("ab", "foo", Store.NO)); - } - w.addDocument(doc); - } - IndexReader reader = w.getReader(); - IndexSearcher searcher = newSearcher(reader); - - searcher.setSimilarity(similarity); - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("a", (float) boost1) - .addField("b", (float) boost2) - .addTerm(new BytesRef("foo")) - .addTerm(new BytesRef("bar")) - .build(); - - checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo"))); - - reader.close(); - w.close(); - dir.close(); + private void checkExpectedHits(IndexSearcher searcher, int numHits, Query firstQuery, Query secondQuery) throws IOException { + TopScoreDocCollector firstCollector = TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); + searcher.search(firstQuery, firstCollector); + TopDocs firstTopDocs = firstCollector.topDocs(); + assertEquals(numHits, firstTopDocs.totalHits.value); + TopScoreDocCollector secondCollector = TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); + searcher.search(secondQuery, secondCollector); + TopDocs secondTopDocs = secondCollector.topDocs(); + CheckHits.checkEqual(firstQuery, secondTopDocs.scoreDocs, firstTopDocs.scoreDocs); } private static Similarity randomCompatibleSimilarity() { @@ -242,102 +157,8 @@ private static Similarity randomCompatibleSimilarity() { new BooleanSimilarity(), new ClassicSimilarity(), new LMDirichletSimilarity(), - new LMJelinekMercerSimilarity(0.1f))); - } - - private void checkExpectedHits( - IndexSearcher searcher, int numHits, Query firstQuery, Query secondQuery) throws IOException { - TopScoreDocCollector firstCollector = - TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); - searcher.search(firstQuery, firstCollector); - TopDocs firstTopDocs = firstCollector.topDocs(); - assertEquals(numHits, firstTopDocs.totalHits.value); - - TopScoreDocCollector secondCollector = - TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE); - searcher.search(secondQuery, secondCollector); - TopDocs secondTopDocs = secondCollector.topDocs(); - CheckHits.checkEqual(firstQuery, secondTopDocs.scoreDocs, firstTopDocs.scoreDocs); - } - - public void testDocWithNegativeNorms() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = new IndexWriterConfig(); - iwc.setSimilarity(new NegativeNormSimilarity()); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - - String queryString = "foo"; - - Document doc = new Document(); - // both fields must contain tokens that match the query string "foo" - doc.add(new TextField("f", "foo", Store.NO)); - doc.add(new TextField("g", "foo baz", Store.NO)); - w.addDocument(doc); - - IndexReader reader = w.getReader(); - IndexSearcher searcher = newSearcher(reader); - searcher.setSimilarity(new BM25Similarity()); - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("f") - .addField("g") - .addTerm(new BytesRef(queryString)) - .build(); - TopDocs topDocs = searcher.search(query, 10); - CheckHits.checkDocIds("queried docs do not match", new int[] {0}, topDocs.scoreDocs); - - reader.close(); - w.close(); - dir.close(); - } - - public void testMultipleDocsNegativeNorms() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = new IndexWriterConfig(); - iwc.setSimilarity(new NegativeNormSimilarity()); - RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - - String queryString = "foo"; - - Document doc0 = new Document(); - doc0.add(new TextField("f", "foo", Store.NO)); - doc0.add(new TextField("g", "foo baz", Store.NO)); - w.addDocument(doc0); - - Document doc1 = new Document(); - // add another match on the query string to the second doc - doc1.add(new TextField("f", "foo is foo", Store.NO)); - doc1.add(new TextField("g", "foo baz", Store.NO)); - w.addDocument(doc1); - - IndexReader reader = w.getReader(); - IndexSearcher searcher = newSearcher(reader); - searcher.setSimilarity(new BM25Similarity()); - XCombinedFieldQuery query = - new XCombinedFieldQuery.Builder() - .addField("f") - .addField("g") - .addTerm(new BytesRef(queryString)) - .build(); - TopDocs topDocs = searcher.search(query, 10); - // Return doc1 ahead of doc0 since its tf is higher - CheckHits.checkDocIds("queried docs do not match", new int[] {1, 0}, topDocs.scoreDocs); - - reader.close(); - w.close(); - dir.close(); - } - - private static final class NegativeNormSimilarity extends Similarity { - @Override - public long computeNorm(FieldInvertState state) { - return -128; - } - - @Override - public SimScorer scorer( - float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { - return new BM25Similarity().scorer(boost, collectionStats, termStats); - } + new LMJelinekMercerSimilarity(0.1f) + ) + ); } }