Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace the deprecated IndexReader APIs with new storedFields() & termVectors() #7792

Merged
merged 8 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Change http code on create index API with bad input raising NotXContentException from 500 to 400 ([#4773](https://github.com/opensearch-project/OpenSearch/pull/4773))
- Improve summary error message for invalid setting updates ([#4792](https://github.com/opensearch-project/OpenSearch/pull/4792))
- Remote Segment Store Repository setting moved from `index.remote_store.repository` to `index.remote_store.segment.repository` and `cluster.remote_store.repository` to `cluster.remote_store.segment.repository` respectively for Index and Cluster level settings ([#8719](https://github.com/opensearch-project/OpenSearch/pull/8719))
- Replace the deprecated IndexReader APIs with new storedFields() & termVectors() ([#7792](https://github.com/opensearch-project/OpenSearch/pull/7792))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
Expand Down Expand Up @@ -808,8 +810,10 @@ public String describeParams() {
*/
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
Map<String, Int> termFreqMap = new HashMap<>();
final TermVectors termVectors = ir.termVectors();
final StoredFields storedFields = ir.storedFields();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Fields vectors = termVectors.get(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
Expand All @@ -819,7 +823,7 @@ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {

// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
Document d = storedFields.document(docNum);
IndexableField fields[] = d.getFields(fieldName);
for (IndexableField field : fields) {
final String stringValue = field.stringValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -507,12 +508,11 @@ private static void consumeFromType(IndexSearcher indexSearcher, String type, Ch
final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
final IntPredicate isLiveDoc = liveDocs == null ? i -> true : liveDocs::get;
final DocIdSetIterator docIdSetIterator = scorer.iterator();
final StoredFields storedFields = leafReaderContext.reader().storedFields();
while (docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (isLiveDoc.test(docIdSetIterator.docID())) {
logger.trace("processing doc {}", docIdSetIterator.docID());
bytesRefConsumer.accept(
leafReaderContext.reader().document(docIdSetIterator.docID()).getBinaryValue(DATA_FIELD_NAME)
);
bytesRefConsumer.accept(storedFields.document(docIdSetIterator.docID()).getBinaryValue(DATA_FIELD_NAME));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.apache.lucene.index.ShuffleForcedMergePolicy;
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
Expand Down Expand Up @@ -2889,14 +2890,15 @@ private void restoreVersionMapAndCheckpointTracker(DirectoryReader directoryRead
final CombinedDocValues dv = new CombinedDocValues(leaf.reader());
final IdOnlyFieldVisitor idFieldVisitor = new IdOnlyFieldVisitor();
final DocIdSetIterator iterator = scorer.iterator();
final StoredFields storedFields = leaf.reader().storedFields();
int docId;
while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
final long primaryTerm = dv.docPrimaryTerm(docId);
final long seqNo = dv.docSeqNo(docId);
localCheckpointTracker.markSeqNoAsProcessed(seqNo);
localCheckpointTracker.markSeqNoAsPersisted(seqNo);
idFieldVisitor.reset();
leaf.reader().document(docId, idFieldVisitor);
storedFields.document(docId, idFieldVisitor);
if (idFieldVisitor.getId() == null) {
assert dv.isTombstone(docId);
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ private Translog.Operation readDocAsOp(int docIndex) throws IOException {
? SourceFieldMapper.RECOVERY_SOURCE_NAME
: SourceFieldMapper.NAME;
final FieldsVisitor fields = new FieldsVisitor(true, sourceField);
leaf.reader().document(segmentDocID, fields);
leaf.reader().storedFields().document(segmentDocID, fields);

final Translog.Operation op;
final boolean isTombstone = parallelArray.isTombStone[docIndex];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,28 +221,33 @@ public int maxDoc() {

@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
if (docID != 0) {
reta marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalArgumentException("no such doc ID " + docID);
}
if (visitor.needsField(FAKE_SOURCE_FIELD) == StoredFieldVisitor.Status.YES) {
assert operation.source().toBytesRef().offset == 0;
assert operation.source().toBytesRef().length == operation.source().toBytesRef().bytes.length;
visitor.binaryField(FAKE_SOURCE_FIELD, operation.source().toBytesRef().bytes);
}
if (operation.routing() != null && visitor.needsField(FAKE_ROUTING_FIELD) == StoredFieldVisitor.Status.YES) {
visitor.stringField(FAKE_ROUTING_FIELD, operation.routing());
}
if (visitor.needsField(FAKE_ID_FIELD) == StoredFieldVisitor.Status.YES) {
BytesRef bytesRef = Uid.encodeId(operation.id());
final byte[] id = new byte[bytesRef.length];
System.arraycopy(bytesRef.bytes, bytesRef.offset, id, 0, bytesRef.length);
visitor.binaryField(FAKE_ID_FIELD, id);
}
storedFields().document(docID, visitor);
}

@Override
public StoredFields storedFields() throws IOException {
throw new UnsupportedOperationException();
return new StoredFields() {
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
if (docID != 0) {
throw new IllegalArgumentException("no such doc ID " + docID);
}
if (visitor.needsField(FAKE_SOURCE_FIELD) == StoredFieldVisitor.Status.YES) {
assert operation.source().toBytesRef().offset == 0;
assert operation.source().toBytesRef().length == operation.source().toBytesRef().bytes.length;
visitor.binaryField(FAKE_SOURCE_FIELD, operation.source().toBytesRef().bytes);
}
if (operation.routing() != null && visitor.needsField(FAKE_ROUTING_FIELD) == StoredFieldVisitor.Status.YES) {
visitor.stringField(FAKE_ROUTING_FIELD, operation.routing());
}
if (visitor.needsField(FAKE_ID_FIELD) == StoredFieldVisitor.Status.YES) {
BytesRef bytesRef = Uid.encodeId(operation.id());
final byte[] id = new byte[bytesRef.length];
System.arraycopy(bytesRef.bytes, bytesRef.offset, id, 0, bytesRef.length);
visitor.binaryField(FAKE_ID_FIELD, id);
}
}
};
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ private GetResult innerGetLoadFromStoredFields(
);
if (fieldVisitor != null) {
try {
docIdAndVersion.reader.document(docIdAndVersion.docId, fieldVisitor);
docIdAndVersion.reader.storedFields().document(docIdAndVersion.docId, fieldVisitor);
} catch (IOException e) {
throw new OpenSearchException("Failed to get id [" + id + "]", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ public Status needsField(FieldInfo fieldInfo) throws IOException {
boolean matches(int doc) throws IOException {
routing = id = null;
leftToVisit = 2;
leafReader.document(doc, this);
leafReader.storedFields().document(doc, this);
assert id != null : "docID must not be null - we might have hit a nested document";
int targetShardId = OperationRouting.generateShardId(indexMetadata, id, routing);
return targetShardId != shardId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.opensearch.OpenSearchException;
Expand Down Expand Up @@ -127,7 +128,8 @@ static TermVectorsResponse getTermVectors(IndexShard indexShard, TermVectorsRequ
/* or from an existing document */
else if (docIdAndVersion != null) {
// fields with stored term vectors
termVectorsByField = docIdAndVersion.reader.getTermVectors(docIdAndVersion.docId);
TermVectors termVectors = docIdAndVersion.reader.termVectors();
termVectorsByField = termVectors.get(docIdAndVersion.docId);
Set<String> selectedFields = request.selectedFields();
// generate tvs for fields where analyzer is overridden
if (selectedFields == null && request.perFieldAnalyzer() != null) {
Expand Down Expand Up @@ -322,7 +324,8 @@ private static Fields generateTermVectors(
}
}
/* and read vectors from it */
return index.createSearcher().getIndexReader().getTermVectors(0);
TermVectors termVectors = index.createSearcher().getIndexReader().termVectors();
return termVectors.get(0);
}

private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVectorsRequest request) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public void execute(SearchContext context) {
SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) currentReaderContext.reader();
fieldReader = lf.getSequentialStoredFieldsReader()::document;
} else {
fieldReader = currentReaderContext.reader()::document;
fieldReader = currentReaderContext.reader().storedFields()::document;
}
for (FetchSubPhaseProcessor processor : processors) {
processor.setNextReader(currentReaderContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public static List<Object> loadFieldValues(
) throws IOException {
if (forceSource == false && fieldType.isStored()) {
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor);
hitContext.reader().storedFields().document(hitContext.docId(), fieldVisitor);
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
return textsToHighlight != null ? textsToHighlight : Collections.emptyList();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ private FieldLookup loadFieldData(String name) {
List<Object> values = new ArrayList<>(2);
SingleFieldsVisitor visitor = new SingleFieldsVisitor(data.fieldType(), values);
try {
reader.document(docId, visitor);
reader.storedFields().document(docId, visitor);
} catch (IOException e) {
throw new OpenSearchParseException("failed to load field [{}]", e, name);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public void setSegmentAndDocument(LeafReaderContext context, int docId) {
SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader();
fieldReader = lf.getSequentialStoredFieldsReader()::document;
} else {
fieldReader = context.reader()::document;
fieldReader = context.reader().storedFields()::document;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
Expand Down Expand Up @@ -565,12 +566,13 @@ public void testWrapAllDocsLive() throws Exception {
}
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
StoredFields storedFields = reader.storedFields();
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
IndexSearcher searcher = new IndexSearcher(reader);
Set<String> actualDocs = new HashSet<>();
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
actualDocs.add(storedFields.document(scoreDoc.doc).get("id"));
}
assertThat(actualDocs, equalTo(liveDocs));
}
Expand Down Expand Up @@ -609,13 +611,14 @@ public void testWrapLiveDocsNotExposeAbortedDocuments() throws Exception {
}
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
StoredFields storedFields = reader.storedFields();
assertThat(reader.maxDoc(), equalTo(numDocs + abortedDocs));
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
IndexSearcher searcher = new IndexSearcher(reader);
List<String> actualDocs = new ArrayList<>();
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
actualDocs.add(storedFields.document(scoreDoc.doc).get("id"));
}
assertThat(actualDocs, equalTo(liveDocs));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -149,9 +150,10 @@ public void setUp() throws Exception {

// now go over each doc, build the relevant references and filter
reader = DirectoryReader.open(iw);
StoredFields storedFields = reader.storedFields();
List<BytesRef> filterTerms = new ArrayList<>();
for (int docId = 0; docId < reader.maxDoc(); docId++) {
Document doc = reader.document(docId);
Document doc = storedFields.document(docId);
addFreqs(doc, referenceAll);
if (!deletedIds.contains(doc.getField("id").stringValue())) {
addFreqs(doc, referenceNotDeleted);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2541,6 +2541,7 @@ class OpAndVersion {
final Term uidTerm = newUid(doc);
engine.index(indexForDoc(doc));
final BiFunction<String, Engine.SearcherScope, Engine.Searcher> searcherFactory = engine::acquireSearcher;

for (int i = 0; i < thread.length; i++) {
thread[i] = new Thread(() -> {
startGun.countDown();
Expand All @@ -2549,10 +2550,12 @@ class OpAndVersion {
} catch (InterruptedException e) {
throw new AssertionError(e);
}

for (int op = 0; op < opsPerThread; op++) {
try (Engine.GetResult get = engine.get(new Engine.Get(true, false, doc.id(), uidTerm), searcherFactory)) {

FieldsVisitor visitor = new FieldsVisitor(true);
get.docIdAndVersion().reader.document(get.docIdAndVersion().docId, visitor);
get.docIdAndVersion().reader.storedFields().document(get.docIdAndVersion().docId, visitor);
List<String> values = new ArrayList<>(Strings.commaDelimitedListToSet(visitor.source().utf8ToString()));
String removed = op % 3 == 0 && values.size() > 0 ? values.remove(0) : null;
String added = "v_" + idGenerator.incrementAndGet();
Expand Down Expand Up @@ -2608,7 +2611,7 @@ class OpAndVersion {

try (Engine.GetResult get = engine.get(new Engine.Get(true, false, doc.id(), uidTerm), searcherFactory)) {
FieldsVisitor visitor = new FieldsVisitor(true);
get.docIdAndVersion().reader.document(get.docIdAndVersion().docId, visitor);
get.docIdAndVersion().reader.storedFields().document(get.docIdAndVersion().docId, visitor);
List<String> values = Arrays.asList(Strings.commaDelimitedListToStringArray(visitor.source().utf8ToString()));
assertThat(currentValues, equalTo(new HashSet<>(values)));
}
Expand Down
Loading