Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Index stale operations to Lucene to have complete history #29679

Merged
merged 7 commits into from
Apr 27, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFileNames;
Expand Down Expand Up @@ -833,6 +834,58 @@ public int length() {
};
}

/**
* Wraps a directory reader to include soft-deleted documents.
* This should be only used to query the history of documents rather than the documents.
*
* @param in the input directory reader
* @return the wrapped reader including soft-deleted documents.
*/
public static DirectoryReader includeSoftDeletes(DirectoryReader in) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should just call wrapAllDocsLive(DirectoryReader in) it's really unrelated to soft deletes

return new DirectoryReaderWithSoftDeletes(in);
}

private static final class DirectoryReaderWithSoftDeletes extends FilterDirectoryReader {
static final class SubReaderWithSoftDeletes extends FilterLeafReader {
SubReaderWithSoftDeletes(LeafReader in) {
super(in);
}
@Override
public Bits getLiveDocs() {
return null;
}
@Override
public int numDocs() {
return maxDoc();
}
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
@Override
public CacheHelper getReaderCacheHelper() {
return null; // Modifying liveDocs
}
}
DirectoryReaderWithSoftDeletes(DirectoryReader in) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader leaf) {
return new SubReaderWithSoftDeletes(leaf);
}
});
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return includeSoftDeletes(in);
}

@Override
public CacheHelper getReaderCacheHelper() {
return null; // Modifying liveDocs
}
}

/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,15 @@ DocIdAndSeqNo lookupSeqNo(BytesRef id, LeafReaderContext context) throws IOExcep
return null;
}
}

/**
* Returns an internal posting list of the given uid
*/
PostingsEnum getPostingsOrNull(BytesRef id) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use this method here as well?

if (termsEnum.seekExact(id)) {
docsEnum = termsEnum.postings(docsEnum, 0);
return docsEnum;
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.CloseableThreadLocal;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
Expand Down Expand Up @@ -193,4 +195,32 @@ public static long loadVersion(IndexReader reader, Term term) throws IOException
final DocIdAndVersion docIdAndVersion = loadDocIdAndVersion(reader, term);
return docIdAndVersion == null ? NOT_FOUND : docIdAndVersion.version;
}

/**
* Checks for the existence of the history of a pair SeqNo/PrimaryTerm in Lucene. The checking pair is considered as existed
* if there is a pair such as the seqNo equals to the checking seqNo and the primary term is at least the checking term.
*/
public static boolean hasHistoryInLucene(IndexReader reader, Term idTerm, long seqNo, long primaryTerm) throws IOException {
final PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, idTerm.field());
final List<LeafReaderContext> leaves = reader.leaves();
// iterate backwards to optimize for the frequently updated documents which are likely to be in the last segments
for (int i = leaves.size() - 1; i >= 0; i--) {
final LeafReaderContext leaf = leaves.get(i);
final PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
final PostingsEnum postingsEnum = lookup.getPostingsOrNull(idTerm.bytes());
if (postingsEnum == null) {
continue;
}
final NumericDocValues seqNoDV = leaf.reader().getNumericDocValues(SeqNoFieldMapper.NAME);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can assert that both seqNoDV and primaryTermDV are non null. They are required at least for this code to work.

final NumericDocValues primaryTermDV = leaf.reader().getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME);
for (int docId = postingsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = postingsEnum.nextDoc()) {
if (seqNoDV != null && seqNoDV.advanceExact(docId) && primaryTermDV != null && primaryTermDV.advanceExact(docId)) {
if (seqNoDV.longValue() == seqNo && primaryTermDV.longValue() >= primaryTerm) {
return true;
}
}
}
}
return false;
}
}
Loading