Skip to content

Commit

Permalink
Fix handling of ghost fields in string sorts. (#11792)
Browse files Browse the repository at this point in the history
Introduction of dynamic pruning for string sorts (#11669) introduced a bug with
string sorts and ghost fields, triggering a `NullPointerException` because the
code assumes that `LeafReader#terms` is not null if the field is indexed
according to field infos.

This commit fixes the issue and adds tests for ghost fields across all sort
types.

Hopefully we can simplify and remove the null check in the future when we
improve handling of ghost fields (#11393).
  • Loading branch information
jpountz committed Sep 20, 2022
1 parent a07b519 commit 4eb4c8e
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ private class TermOrdValLeafComparator implements LeafFieldComparator {
enableSkipping = false;
} else {
Terms terms = context.reader().terms(field);
dense = terms.getDocCount() == context.reader().maxDoc();
dense = terms != null && terms.getDocCount() == context.reader().maxDoc();
if (dense || topValue != null) {
enableSkipping = true;
} else if (reverse == sortMissingLast) {
Expand Down
193 changes: 193 additions & 0 deletions lucene/core/src/test/org/apache/lucene/search/TestSort.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,21 @@
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
Expand Down Expand Up @@ -847,4 +857,187 @@ public void testRewrite() throws IOException {
assertSame(sort, sort.rewrite(searcher));
}
}

// Ghost tests make sure that sorting can cope with segments that are missing values while their
// FieldInfo reports that the field exists.

public void testStringGhost() throws IOException {
doTestStringGhost(true);
doTestStringGhost(false);
}

private void doTestStringGhost(boolean indexed) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
doc.add(new SortedDocValuesField("value", newBytesRef("foo")));
if (indexed) {
doc.add(newStringField("value", "foo", Field.Store.YES));
}
doc.add(new StringField("id", "0", Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.flush();
writer.addDocument(new Document());
writer.flush();
writer.deleteDocuments(new Term("id", "0"));
writer.forceMerge(1);
IndexReader ir = DirectoryReader.open(writer);
writer.close();

IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING));

TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits.value);
assertNull(((FieldDoc) td.scoreDocs[0]).fields[0]);
assertNull(((FieldDoc) td.scoreDocs[1]).fields[0]);

ir.close();
dir.close();
}

public void testIntGhost() throws IOException {
doTestIntGhost(true);
doTestIntGhost(false);
}

private void doTestIntGhost(boolean indexed) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 3));
if (indexed) {
doc.add(new IntPoint("value", 3));
}
doc.add(new StringField("id", "0", Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.flush();
writer.addDocument(new Document());
writer.flush();
writer.deleteDocuments(new Term("id", "0"));
writer.forceMerge(1);
IndexReader ir = DirectoryReader.open(writer);
writer.close();

IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.INT));

TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits.value);
assertEquals(0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
assertEquals(0, ((FieldDoc) td.scoreDocs[1]).fields[0]);

ir.close();
dir.close();
}

public void testLongGhost() throws IOException {
doTestLongGhost(true);
doTestLongGhost(false);
}

private void doTestLongGhost(boolean indexed) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
doc.add(new NumericDocValuesField("value", 3L));
if (indexed) {
doc.add(new LongPoint("value", 3L));
}
doc.add(new StringField("id", "0", Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.flush();
writer.addDocument(new Document());
writer.flush();
writer.deleteDocuments(new Term("id", "0"));
writer.forceMerge(1);
IndexReader ir = DirectoryReader.open(writer);
writer.close();

IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.LONG));

TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits.value);
assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
assertEquals(0L, ((FieldDoc) td.scoreDocs[1]).fields[0]);

ir.close();
dir.close();
}

public void testDoubleGhost() throws IOException {
doTestDoubleGhost(true);
doTestDoubleGhost(false);
}

private void doTestDoubleGhost(boolean indexed) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
doc.add(new DoubleDocValuesField("value", 1.25));
if (indexed) {
doc.add(new DoublePoint("value", 1.25));
}
doc.add(new StringField("id", "0", Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.flush();
writer.addDocument(new Document());
writer.flush();
writer.deleteDocuments(new Term("id", "0"));
writer.forceMerge(1);
IndexReader ir = DirectoryReader.open(writer);
writer.close();

IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));

TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits.value);
assertEquals(0.0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
assertEquals(0.0, ((FieldDoc) td.scoreDocs[1]).fields[0]);

ir.close();
dir.close();
}

public void testFloatGhost() throws IOException {
doTestFloatGhost(true);
doTestFloatGhost(false);
}

private void doTestFloatGhost(boolean indexed) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
doc.add(new FloatDocValuesField("value", 1.25f));
if (indexed) {
doc.add(new FloatPoint("value", 1.25f));
}
doc.add(new StringField("id", "0", Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
writer.flush();
writer.addDocument(new Document());
writer.flush();
writer.deleteDocuments(new Term("id", "0"));
writer.forceMerge(1);
IndexReader ir = DirectoryReader.open(writer);
writer.close();

IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));

TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits.value);
assertEquals(0.0f, ((FieldDoc) td.scoreDocs[0]).fields[0]);
assertEquals(0.0f, ((FieldDoc) td.scoreDocs[1]).fields[0]);

ir.close();
dir.close();
}
}

0 comments on commit 4eb4c8e

Please sign in to comment.