Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use soft-update to maintain document history #29458

Merged
merged 7 commits into from
Apr 12, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
Expand Down Expand Up @@ -96,6 +97,8 @@ public class Lucene {
assert annotation == null : "DocValuesFormat " + LATEST_DOC_VALUES_FORMAT + " is deprecated" ;
}

public static final String SOFT_DELETE_FIELD = "__soft_delete";

public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());

Expand Down Expand Up @@ -829,4 +832,11 @@ public int length() {
}
};
}

/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
public static NumericDocValuesField newSoftDeleteField() {
return new NumericDocValuesField(SOFT_DELETE_FIELD, 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.MAX_REGEX_LENGTH_SETTING,
ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING,
IndexSettings.INDEX_GC_DELETES_SETTING,
IndexSettings.INDEX_SOFT_DELETES_SETTING,
IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,
UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING,
EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING,
Expand Down
15 changes: 15 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ public final class IndexSettings {
public static final Setting<TimeValue> INDEX_GC_DELETES_SETTING =
Setting.timeSetting("index.gc_deletes", DEFAULT_GC_DELETES, new TimeValue(-1, TimeUnit.MILLISECONDS), Property.Dynamic,
Property.IndexScope);

/**
* Specifies if the index should use soft-delete instead of hard-delete for update/delete operations.
*/
public static final Setting<Boolean> INDEX_SOFT_DELETES_SETTING = Setting.boolSetting("index.soft_deletes", true, Property.IndexScope);

/**
* The maximum number of refresh listeners allows on this shard.
*/
Expand Down Expand Up @@ -282,6 +288,7 @@ public final class IndexSettings {
private final IndexSortConfig indexSortConfig;
private final IndexScopedSettings scopedSettings;
private long gcDeletesInMillis = DEFAULT_GC_DELETES.millis();
private final boolean softDeleteEnabled;
private volatile boolean warmerEnabled;
private volatile int maxResultWindow;
private volatile int maxInnerResultWindow;
Expand Down Expand Up @@ -393,6 +400,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING);
mergeSchedulerConfig = new MergeSchedulerConfig(this);
gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis();
softDeleteEnabled = version.onOrAfter(Version.V_7_0_0_alpha1) && scopedSettings.get(INDEX_SOFT_DELETES_SETTING);
warmerEnabled = scopedSettings.get(INDEX_WARMER_ENABLED_SETTING);
maxResultWindow = scopedSettings.get(MAX_RESULT_WINDOW_SETTING);
maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING);
Expand Down Expand Up @@ -839,4 +847,11 @@ public boolean isExplicitRefresh() {
* Returns the time that an index shard becomes search idle unless it's accessed in between
*/
public TimeValue getSearchIdleAfter() { return searchIdleAfter; }

/**
* Returns <code>true</code> if soft-delete is enabled.
*/
public boolean isSoftDeleteEnabled() {
return softDeleteEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ public final class CommitStats implements Streamable, ToXContentFragment {
private String id; // lucene commit id in base 64;
private int numDocs;

public CommitStats(SegmentInfos segmentInfos) {
public CommitStats(SegmentInfos segmentInfos, int numDocs) {
// clone the map to protect against concurrent changes
userData = MapBuilder.<String, String>newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap();
// lucene calls the current generation, last generation.
generation = segmentInfos.getLastGeneration();
id = Base64.getEncoder().encodeToString(segmentInfos.getId());
numDocs = Lucene.getNumDocs(segmentInfos);
this.numDocs = numDocs;
}

private CommitStats() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,9 @@ protected final void ensureOpen() {

/** get commits stats for the last commit */
public CommitStats commitStats() {
return new CommitStats(getLastCommittedSegmentInfos());
try (Engine.Searcher searcher = acquireSearcher("commit_stats", Engine.SearcherScope.INTERNAL)) {
return new CommitStats(getLastCommittedSegmentInfos(), searcher.reader().numDocs());
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
import org.elasticsearch.index.seqno.SequenceNumbers;
import org.elasticsearch.index.shard.ElasticsearchMergePolicy;
import org.elasticsearch.index.shard.IndexingStats;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig;
Expand Down Expand Up @@ -138,6 +137,7 @@ public class InternalEngine extends Engine {
private final CounterMetric numDocDeletes = new CounterMetric();
private final CounterMetric numDocAppends = new CounterMetric();
private final CounterMetric numDocUpdates = new CounterMetric();
private final boolean softDeleteEnabled;

/**
* How many bytes we are currently moving to disk, via either IndexWriter.flush or refresh. IndexingMemoryController polls this
Expand All @@ -161,6 +161,7 @@ public InternalEngine(EngineConfig engineConfig) {
if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) {
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
}
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy(
engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(),
engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis()
Expand Down Expand Up @@ -768,6 +769,7 @@ public IndexResult index(Index index) throws IOException {
} else if (plan.indexIntoLucene) {
indexResult = indexIntoLucene(index, plan);
} else {
// TODO: We need to index stale documents to have a full history in Lucene.
indexResult = new IndexResult(
plan.versionForIndexing, plan.seqNoForIndexing, plan.currentNotFoundOrDeleted);
}
Expand Down Expand Up @@ -1066,10 +1068,18 @@ private boolean assertDocDoesNotExist(final Index index, final boolean allowDele
}

private void updateDocs(final Term uid, final List<ParseContext.Document> docs, final IndexWriter indexWriter) throws IOException {
if (docs.size() > 1) {
indexWriter.updateDocuments(uid, docs);
if (softDeleteEnabled) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in the delete case we should also use indexWriter.updateDocValue(uid, Lucene.getSoftDeleteDVMarker()); instead of IndexWriter#deleteDocuments and then override this in the test to maybe throw an exception.

if (docs.size() > 1) {
indexWriter.softUpdateDocuments(uid, docs, Lucene.newSoftDeleteField());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can cache this in here in a constant so we don't need to re-create a new one every time.

} else {
indexWriter.softUpdateDocument(uid, docs.get(0), Lucene.newSoftDeleteField());
}
} else {
indexWriter.updateDocument(uid, docs.get(0));
if (docs.size() > 1) {
indexWriter.updateDocuments(uid, docs);
} else {
indexWriter.updateDocument(uid, docs.get(0));
}
}
numDocUpdates.inc(docs.size());
}
Expand Down Expand Up @@ -1927,11 +1937,14 @@ private IndexWriterConfig getIndexWriterConfig() {
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
MergePolicy mergePolicy = config().getMergePolicy();
// Give us the opportunity to upgrade old segments while performing
// background merges
mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
iwc.setMergePolicy(mergePolicy);
MergePolicy mergePolicy = config().getMergePolicy();
if (softDeleteEnabled) {
// TODO: soft-delete retention policy
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,6 @@ public RecoveryDiff recoveryDiff(MetadataSnapshot recoveryTargetSnapshot) {
}
final String segmentId = IndexFileNames.parseSegmentName(meta.name());
final String extension = IndexFileNames.getExtension(meta.name());
assert FIELD_INFOS_FILE_EXTENSION.equals(extension) == false || IndexFileNames.stripExtension(IndexFileNames.stripSegmentName(meta.name())).isEmpty() : "FieldInfos are generational but updateable DV are not supported in elasticsearch";
if (IndexFileNames.SEGMENTS.equals(segmentId) || DEL_FILE_EXTENSION.equals(extension) || LIV_FILE_EXTENSION.equals(extension)) {
// only treat del files as per-commit files fnm files are generational but only for upgradable DV
perCommitStoreFiles.add(meta);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.test.IndexSettingsModule;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;

Expand Down Expand Up @@ -2711,6 +2712,12 @@ private void maybeThrowFailure() throws IOException {
}
}

@Override
public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc, Field... softDeletes) throws IOException {
maybeThrowFailure();
return super.softUpdateDocument(term, doc, softDeletes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++

}

@Override
public long deleteDocuments(Term... terms) throws IOException {
maybeThrowFailure();
Expand Down