Skip to content

Commit

Permalink
Remove workaround to access Lucene's "expert" readLatestCommit API (o…
Browse files Browse the repository at this point in the history
…pensearch-project#7671)

* Remove workaround to access Lucene's "expert" readLatestCommit API

Lucene 9.6.0 includes changes to make the "expert" readLatestCommit API public, so the current code workaround using DirectoryReader is no longer required.

Signed-off-by: Kartik Ganesh <[email protected]>

* Spotless run

Signed-off-by: Kartik Ganesh <[email protected]>

---------

Signed-off-by: Kartik Ganesh <[email protected]>
Signed-off-by: Bharathwaj G <[email protected]>
  • Loading branch information
kartg authored and bharath-techie committed May 23, 2023
1 parent ec2c788 commit d1274ad
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 73 deletions.
18 changes: 4 additions & 14 deletions server/src/main/java/org/opensearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FieldDoc;
Expand Down Expand Up @@ -138,21 +137,12 @@ public static SegmentInfos readSegmentInfos(Directory directory) throws IOExcept

/**
* A variant of {@link #readSegmentInfos(Directory)} that supports reading indices written by
* older major versions of Lucene. The underlying implementation is a workaround since the
* "expert" readLatestCommit API is currently package-private in Lucene. First, all commits in
* the given {@link Directory} are listed - this result includes older Lucene commits. Then,
* the latest index commit is opened via {@link DirectoryReader} by including a minimum supported
* Lucene major version based on the minimum compatibility of the given {@link org.opensearch.Version}.
* older major versions of Lucene. This leverages Lucene's "expert" readLatestCommit API. The
* {@link org.opensearch.Version} parameter determines the minimum supported Lucene major version.
*/
public static SegmentInfos readSegmentInfosExtendedCompatibility(Directory directory, org.opensearch.Version minimumVersion)
throws IOException {
// This list is sorted from oldest to latest
List<IndexCommit> indexCommits = DirectoryReader.listCommits(directory);
IndexCommit latestCommit = indexCommits.get(indexCommits.size() - 1);
public static SegmentInfos readSegmentInfos(Directory directory, org.opensearch.Version minimumVersion) throws IOException {
final int minSupportedLuceneMajor = minimumVersion.minimumIndexCompatibilityVersion().luceneVersion.major;
try (StandardDirectoryReader reader = (StandardDirectoryReader) DirectoryReader.open(latestCommit, minSupportedLuceneMajor, null)) {
return reader.getSegmentInfos();
}
return SegmentInfos.readLatestCommit(directory, minSupportedLuceneMajor);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public ReadOnlyEngine(
// yet this makes sure nobody else does. including some testing tools that try to be messy
indexWriterLock = obtainLock ? directory.obtainLock(IndexWriter.WRITE_LOCK_NAME) : null;
if (isExtendedCompatibility()) {
this.lastCommittedSegmentInfos = Lucene.readSegmentInfosExtendedCompatibility(directory, this.minimumSupportedVersion);
this.lastCommittedSegmentInfos = Lucene.readSegmentInfos(directory, this.minimumSupportedVersion);
} else {
this.lastCommittedSegmentInfos = Lucene.readSegmentInfos(directory);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2275,9 +2275,7 @@ private boolean assertSequenceNumbersInCommit() throws IOException {

private Map<String, String> fetchUserData() throws IOException {
if (indexSettings.isRemoteSnapshot() && indexSettings.getExtendedCompatibilitySnapshotVersion() != null) {
// Inefficient method to support reading old Lucene indexes
return Lucene.readSegmentInfosExtendedCompatibility(store.directory(), indexSettings.getExtendedCompatibilitySnapshotVersion())
.getUserData();
return Lucene.readSegmentInfos(store.directory(), indexSettings.getExtendedCompatibilitySnapshotVersion()).getUserData();
} else {
return SegmentInfos.readLatestCommit(store.directory()).getUserData();
}
Expand Down
2 changes: 1 addition & 1 deletion server/src/main/java/org/opensearch/index/store/Store.java
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ private static SegmentInfos readSegmentsInfo(IndexCommit commit, Directory direc
private static SegmentInfos readSegmentInfosExtendedCompatibility(Directory directory, org.opensearch.Version minimumVersion)
throws IOException {
try {
return Lucene.readSegmentInfosExtendedCompatibility(directory, minimumVersion);
return Lucene.readSegmentInfos(directory, minimumVersion);
} catch (EOFException eof) {
// TODO this should be caught by lucene - EOF is almost certainly an index corruption
throw new CorruptIndexException("Read past EOF while reading segment infos", "<latest-commit>", eof);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ public void testReadSegmentInfosExtendedCompatibility() throws IOException {
try (MockDirectoryWrapper dir = newMockFSDirectory(tmp)) {
// The standard API will throw an exception
expectThrows(IndexFormatTooOldException.class, () -> Lucene.readSegmentInfos(dir));
SegmentInfos si = Lucene.readSegmentInfosExtendedCompatibility(dir, minVersion);
SegmentInfos si = Lucene.readSegmentInfos(dir, minVersion);
assertEquals(1, Lucene.getNumDocs(si));
IndexCommit indexCommit = Lucene.getIndexCommit(si, dir);
// uses the "expert" Lucene API
Expand All @@ -358,59 +358,6 @@ public void testReadSegmentInfosExtendedCompatibility() throws IOException {
}
}

/**
* Since the implementation in {@link Lucene#readSegmentInfosExtendedCompatibility(Directory, Version)}
* is a workaround, this test verifies that the response from this method is equivalent to
* {@link Lucene#readSegmentInfos(Directory)} if the version is N-1
*/
public void testReadSegmentInfosExtendedCompatibilityBaseCase() throws IOException {
MockDirectoryWrapper dir = newMockDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
SegmentInfos expectedSI = Lucene.readSegmentInfos(dir);
SegmentInfos actualSI = Lucene.readSegmentInfosExtendedCompatibility(dir, Version.CURRENT);
assertEquals(Lucene.getNumDocs(expectedSI), Lucene.getNumDocs(actualSI));
assertEquals(expectedSI.getGeneration(), actualSI.getGeneration());
assertEquals(expectedSI.getSegmentsFileName(), actualSI.getSegmentsFileName());
assertEquals(expectedSI.getVersion(), actualSI.getVersion());
assertEquals(expectedSI.getCommitLuceneVersion(), actualSI.getCommitLuceneVersion());
assertEquals(expectedSI.getMinSegmentLuceneVersion(), actualSI.getMinSegmentLuceneVersion());
assertEquals(expectedSI.getIndexCreatedVersionMajor(), actualSI.getIndexCreatedVersionMajor());
assertEquals(expectedSI.getUserData(), actualSI.getUserData());

int numDocsToIndex = randomIntBetween(10, 50);
List<Term> deleteTerms = new ArrayList<>();
for (int i = 0; i < numDocsToIndex; i++) {
doc = new Document();
doc.add(new TextField("id", "doc_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
deleteTerms.add(new Term("id", "doc_" + i));
writer.addDocument(doc);
}
int numDocsToDelete = randomIntBetween(0, numDocsToIndex);
Collections.shuffle(deleteTerms, random());
for (int i = 0; i < numDocsToDelete; i++) {
Term remove = deleteTerms.remove(0);
writer.deleteDocuments(remove);
}
writer.commit();
expectedSI = Lucene.readSegmentInfos(dir);
actualSI = Lucene.readSegmentInfosExtendedCompatibility(dir, Version.CURRENT);
assertEquals(Lucene.getNumDocs(expectedSI), Lucene.getNumDocs(actualSI));
assertEquals(expectedSI.getGeneration(), actualSI.getGeneration());
assertEquals(expectedSI.getSegmentsFileName(), actualSI.getSegmentsFileName());
assertEquals(expectedSI.getVersion(), actualSI.getVersion());
assertEquals(expectedSI.getCommitLuceneVersion(), actualSI.getCommitLuceneVersion());
assertEquals(expectedSI.getMinSegmentLuceneVersion(), actualSI.getMinSegmentLuceneVersion());
assertEquals(expectedSI.getIndexCreatedVersionMajor(), actualSI.getIndexCreatedVersionMajor());
assertEquals(expectedSI.getUserData(), actualSI.getUserData());
writer.close();
dir.close();
}

public void testCount() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Expand Down

0 comments on commit d1274ad

Please sign in to comment.