Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use soft-update to maintain document history #29458

Merged
merged 7 commits into from
Apr 12, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ public int length() {
/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
public static NumericDocValuesField getSoftDeleteDVMarker() {
public static NumericDocValuesField newSoftDeleteField() {
return new NumericDocValuesField(SOFT_DELETE_FIELD, 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING);
mergeSchedulerConfig = new MergeSchedulerConfig(this);
gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis();
softDeleteEnabled = scopedSettings.get(INDEX_SOFT_DELETES_SETTING);
softDeleteEnabled = version.onOrAfter(Version.V_7_0_0_alpha1) && scopedSettings.get(INDEX_SOFT_DELETES_SETTING);
warmerEnabled = scopedSettings.get(INDEX_WARMER_ENABLED_SETTING);
maxResultWindow = scopedSettings.get(MAX_RESULT_WINDOW_SETTING);
maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING);
Expand Down Expand Up @@ -852,6 +852,6 @@ public boolean isExplicitRefresh() {
* Returns <code>true</code> if soft-delete is enabled.
*/
public boolean isSoftDeleteEnabled() {
return getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1) && softDeleteEnabled;
return softDeleteEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ReferenceManager;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
Expand Down Expand Up @@ -163,7 +161,6 @@ public InternalEngine(EngineConfig engineConfig) {
if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) {
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
}
this.uidField = engineConfig.getIndexSettings().isSingleType() ? IdFieldMapper.NAME : UidFieldMapper.NAME;
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy(
engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(),
Expand Down Expand Up @@ -1073,9 +1070,9 @@ private boolean assertDocDoesNotExist(final Index index, final boolean allowDele
private void updateDocs(final Term uid, final List<ParseContext.Document> docs, final IndexWriter indexWriter) throws IOException {
if (softDeleteEnabled) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in the delete case we should also use indexWriter.updateDocValue(uid, Lucene.getSoftDeleteDVMarker()); instead of IndexWriter#deleteDocuments and then override this in the test to maybe throw an exception.

if (docs.size() > 1) {
indexWriter.softUpdateDocuments(uid, docs, Lucene.getSoftDeleteDVMarker());
indexWriter.softUpdateDocuments(uid, docs, Lucene.newSoftDeleteField());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can cache this in here in a constant so we don't need to re-create a new one every time.

} else {
indexWriter.softUpdateDocument(uid, docs.get(0), Lucene.getSoftDeleteDVMarker());
indexWriter.softUpdateDocument(uid, docs.get(0), Lucene.newSoftDeleteField());
}
} else {
if (docs.size() > 1) {
Expand Down Expand Up @@ -1927,7 +1924,6 @@ IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOEx
return new IndexWriter(directory, iwc);
}


private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
iwc.setCommitOnClose(false); // we by default don't commit on close
Expand All @@ -1945,9 +1941,8 @@ private IndexWriterConfig getIndexWriterConfig() {
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
if (softDeleteEnabled) {
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
// TODO: soft-delete retention policy
mergePolicy = new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, () -> new MatchAllDocsQuery(), mergePolicy);
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.index.shard;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergePolicyWrapper;
Expand Down Expand Up @@ -126,11 +125,6 @@ public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
return super.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer);
}

@Override
public boolean keepFullyDeletedSegment(CodecReader reader) throws IOException {
return delegate.keepFullyDeletedSegment(reader);
}

/**
* When <code>upgrade</code> is true, running a force merge will upgrade any segments written
* with older versions. This will apply to the next call to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
Expand Down Expand Up @@ -507,7 +505,7 @@ public void testSegmentsWithMergeFlag() throws Exception {

if (flush) {
// we should have had just 1 merge, so last generation should be exact
assertEquals(gen2 + 1, store.readLastCommittedSegmentsInfo().getLastGeneration());
assertEquals(gen2, store.readLastCommittedSegmentsInfo().getLastGeneration());
}
}
}
Expand Down Expand Up @@ -4546,41 +4544,6 @@ public void testTrimUnsafeCommits() throws Exception {
}
}

/**
* A simple test checks that the document history is maintained when a document is updated.
*/
public void testUpdateMaintainDocumentHistory() throws Exception {
final IndexMetaData indexMetaData = IndexMetaData.builder("test")
.settings(settings(Version.CURRENT).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true))
.numberOfShards(1).numberOfReplicas(1).build();
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexMetaData);
final String docId = "doc-id";
final int versions = between(2, 20);
try (Store store = createStore();
InternalEngine engine = createEngine(indexSettings, store, createTempDir(), new LogDocMergePolicy())) {
final ParsedDocument doc = testParsedDocument(docId, null, testDocument(), new BytesArray("{}"), null);
for (int version = 1; version <= versions; version++) {
Engine.IndexResult indexResult = engine.index(indexForDoc(doc));
assertThat(indexResult.getFailure(), nullValue());
if (randomBoolean()) {
engine.flush();
}
}
engine.refresh("test");
try (Searcher searcher = engine.acquireSearcher("test", Engine.SearcherScope.INTERNAL)) {
assertThat(searcher.reader().numDocs(), equalTo(1));
assertThat(searcher.reader().numDeletedDocs(), equalTo(versions - 1));
try (Engine.GetResult get = engine.get(new Engine.Get(true, false, doc.type(), doc.id(), newUid(doc)), engine::acquireSearcher)) {
assertThat((int) get.version(), equalTo(versions));
}
// Use a reader which includes soft-deleted documents to verify history.
final UnfilteredReader unfilteredReader = new UnfilteredReader(searcher.getDirectoryReader());
assertThat(unfilteredReader.numDocs(), equalTo(versions));
assertThat(unfilteredReader.numDeletedDocs(), equalTo(0));
}
}
}

private static void trimUnsafeCommits(EngineConfig config) throws IOException {
final Store store = config.getStore();
final TranslogConfig translogConfig = config.getTranslogConfig();
Expand All @@ -4599,50 +4562,4 @@ void assertLuceneOperations(InternalEngine engine, long expectedAppends, long ex
assertThat(message, engine.getNumDocUpdates(), equalTo(expectedUpdates));
assertThat(message, engine.getNumDocDeletes(), equalTo(expectedDeletes));
}

/**
* A reader that does not exclude soft-deleted documents.
*/
static final class UnfilteredReader extends FilterDirectoryReader {
static final class UnfilteredSubReaderWrapper extends SubReaderWrapper {
@Override
public LeafReader wrap(LeafReader in) {
return new FilterLeafReader(in) {
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}

@Override
public CacheHelper getReaderCacheHelper() {
return null;
}

@Override
public int numDocs() {
return maxDoc();
}

@Override
public Bits getLiveDocs() {
return null;
}
};
}
}

UnfilteredReader(DirectoryReader in) throws IOException {
super(in, new UnfilteredSubReaderWrapper());
}

@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new UnfilteredReader(in);
}

@Override
public CacheHelper getReaderCacheHelper() {
return null; // we are modifying live docs.
}
}
}