Skip to content

Commit

Permalink
Introducing a translog deletion policy (#24950)
Browse files Browse the repository at this point in the history
Currently, the decisions regarding which translog generation files to delete are hard coded in the interaction between the `InternalEngine` and the `Translog` classes. This PR extracts it to a dedicated class called `TranslogDeletionPolicy`, for two main reasons:

1) Simplicity - the code is easier to read and understand (no more two phase commit on the translog, the Engine can just commit and the translog will respond)
2) Preparing for future plans to extend the logic we need - i.e., retain multiple lucene commit and also introduce a size based retention logic, allowing people to always keep a certain amount of translog files around. The latter is useful to increase the chance of an ops based recovery.
  • Loading branch information
bleskes authored Jun 1, 2017
1 parent 3eabb3a commit 1775e42
Show file tree
Hide file tree
Showing 9 changed files with 519 additions and 297 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.engine;

import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogDeletionPolicy;

import java.io.IOException;
import java.util.List;

/**
* An {@link IndexDeletionPolicy} that coordinates between Lucene's commits and the retention of translog generation files,
* making sure that all translog files that are needed to recover from the Lucene commit are not deleted.
*/
class CombinedDeletionPolicy extends IndexDeletionPolicy {

private final TranslogDeletionPolicy translogDeletionPolicy;
private final EngineConfig.OpenMode openMode;

private final SnapshotDeletionPolicy indexDeletionPolicy;

CombinedDeletionPolicy(SnapshotDeletionPolicy indexDeletionPolicy, TranslogDeletionPolicy translogDeletionPolicy,
EngineConfig.OpenMode openMode) {
this.indexDeletionPolicy = indexDeletionPolicy;
this.translogDeletionPolicy = translogDeletionPolicy;
this.openMode = openMode;
}

@Override
public void onInit(List<? extends IndexCommit> commits) throws IOException {
indexDeletionPolicy.onInit(commits);
switch (openMode) {
case CREATE_INDEX_AND_TRANSLOG:
assert commits.isEmpty() : "index is being created but we already have commits";
break;
case OPEN_INDEX_CREATE_TRANSLOG:
assert commits.isEmpty() == false : "index is opened, but we have no commits";
break;
case OPEN_INDEX_AND_TRANSLOG:
assert commits.isEmpty() == false : "index is opened, but we have no commits";
setLastCommittedTranslogGeneration(commits);
break;
default:
throw new IllegalArgumentException("unknown openMode [" + openMode + "]");
}
}

@Override
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
indexDeletionPolicy.onCommit(commits);
setLastCommittedTranslogGeneration(commits);
}

private void setLastCommittedTranslogGeneration(List<? extends IndexCommit> commits) throws IOException {
// when opening an existing lucene index, we currently always open the last commit.
// we therefore use the translog gen as the one that will be required for recovery
final IndexCommit indexCommit = commits.get(commits.size() - 1);
assert indexCommit.isDeleted() == false : "last commit is deleted";
long minGen = Long.parseLong(indexCommit.getUserData().get(Translog.TRANSLOG_GENERATION_KEY));
translogDeletionPolicy.setMinTranslogGenerationForRecovery(minGen);
}

public SnapshotDeletionPolicy getIndexDeletionPolicy() {
return indexDeletionPolicy;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig;
import org.elasticsearch.index.translog.TranslogCorruptedException;
import org.elasticsearch.index.translog.TranslogDeletionPolicy;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.IOException;
Expand Down Expand Up @@ -127,7 +128,7 @@ public class InternalEngine extends Engine {

private final String uidField;

private final SnapshotDeletionPolicy deletionPolicy;
private final CombinedDeletionPolicy deletionPolicy;

// How many callers are currently requesting index throttling. Currently there are only two situations where we do this: when merges
// are falling behind and when writing indexing buffer to disk is too slow. When this is 0, there is no throttling, else we throttling
Expand All @@ -147,9 +148,11 @@ public InternalEngine(EngineConfig engineConfig) throws EngineException {
if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) {
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
}
deletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
this.uidField = engineConfig.getIndexSettings().isSingleType() ? IdFieldMapper.NAME : UidFieldMapper.NAME;
this.versionMap = new LiveVersionMap();
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy();
this.deletionPolicy = new CombinedDeletionPolicy(
new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()), translogDeletionPolicy, openMode);
store.incRef();
IndexWriter writer = null;
Translog translog = null;
Expand Down Expand Up @@ -188,7 +191,7 @@ public InternalEngine(EngineConfig engineConfig) throws EngineException {
seqNoService = sequenceNumberService(shardId, engineConfig.getIndexSettings(), seqNoStats);
updateMaxUnsafeAutoIdTimestampFromWriter(writer);
indexWriter = writer;
translog = openTranslog(engineConfig, writer, () -> seqNoService().getGlobalCheckpoint());
translog = openTranslog(engineConfig, writer, translogDeletionPolicy, () -> seqNoService().getGlobalCheckpoint());
assert translog.getGeneration() != null;
} catch (IOException | TranslogCorruptedException e) {
throw new EngineCreationFailureException(shardId, "failed to create engine", e);
Expand Down Expand Up @@ -320,29 +323,21 @@ private void recoverFromTranslog(TranslogRecoveryPerformer handler) throws IOExc
}
}

private Translog openTranslog(EngineConfig engineConfig, IndexWriter writer, LongSupplier globalCheckpointSupplier) throws IOException {
private Translog openTranslog(EngineConfig engineConfig, IndexWriter writer, TranslogDeletionPolicy translogDeletionPolicy, LongSupplier globalCheckpointSupplier) throws IOException {
assert openMode != null;
final TranslogConfig translogConfig = engineConfig.getTranslogConfig();
Translog.TranslogGeneration generation = null;
String translogUUID = null;
if (openMode == EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG) {
generation = loadTranslogIdFromCommit(writer);
translogUUID = loadTranslogUUIDFromCommit(writer);
// We expect that this shard already exists, so it must already have an existing translog else something is badly wrong!
if (generation == null) {
throw new IllegalStateException("no translog generation present in commit data but translog is expected to exist");
}
if (generation.translogUUID == null) {
if (translogUUID == null) {
throw new IndexFormatTooOldException("translog", "translog has no generation nor a UUID - this might be an index from a previous version consider upgrading to N-1 first");
}
}
final Translog translog = new Translog(translogConfig, generation, globalCheckpointSupplier);
if (generation == null || generation.translogUUID == null) {
final Translog translog = new Translog(translogConfig, translogUUID, translogDeletionPolicy, globalCheckpointSupplier);
if (translogUUID == null) {
assert openMode != EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG : "OpenMode must not be "
+ EngineConfig.OpenMode.OPEN_INDEX_AND_TRANSLOG;
if (generation == null) {
logger.debug("no translog ID present in the current generation - creating one");
} else if (generation.translogUUID == null) {
logger.debug("upgraded translog to pre 2.0 format, associating translog with index - writing translog UUID");
}
boolean success = false;
try {
commitIndexWriter(writer, translog, openMode == EngineConfig.OpenMode.OPEN_INDEX_CREATE_TRANSLOG
Expand All @@ -368,22 +363,18 @@ public Translog getTranslog() {
* translog id into lucene and returns null.
*/
@Nullable
private Translog.TranslogGeneration loadTranslogIdFromCommit(IndexWriter writer) throws IOException {
private String loadTranslogUUIDFromCommit(IndexWriter writer) throws IOException {
// commit on a just opened writer will commit even if there are no changes done to it
// we rely on that for the commit data translog id key
final Map<String, String> commitUserData = commitDataAsMap(writer);
if (commitUserData.containsKey("translog_id")) {
assert commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY) == false : "legacy commit contains translog UUID";
return new Translog.TranslogGeneration(null, Long.parseLong(commitUserData.get("translog_id")));
} else if (commitUserData.containsKey(Translog.TRANSLOG_GENERATION_KEY)) {
if (commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY) == false) {
throw new IllegalStateException("commit doesn't contain translog UUID");
if (commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY)) {
if (commitUserData.containsKey(Translog.TRANSLOG_GENERATION_KEY) == false) {
throw new IllegalStateException("commit doesn't contain translog generation id");
}
final String translogUUID = commitUserData.get(Translog.TRANSLOG_UUID_KEY);
final long translogGen = Long.parseLong(commitUserData.get(Translog.TRANSLOG_GENERATION_KEY));
return new Translog.TranslogGeneration(translogUUID, translogGen);
return commitUserData.get(Translog.TRANSLOG_UUID_KEY);
} else {
return null;
}
return null;
}

private SearcherManager createSearcherManager() throws EngineException {
Expand Down Expand Up @@ -1269,14 +1260,13 @@ public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineExcepti
if (indexWriter.hasUncommittedChanges() || force) {
ensureCanFlush();
try {
translog.prepareCommit();
translog.rollGeneration();
logger.trace("starting commit for flush; commitTranslog=true");
final long committedGeneration = commitIndexWriter(indexWriter, translog, null);
commitIndexWriter(indexWriter, translog, null);
logger.trace("finished commit for flush");
// we need to refresh in order to clear older version values
refresh("version_table_flush");
// after refresh documents can be retrieved from the index so we can now commit the translog
translog.commit(committedGeneration);
translog.trimUnreferencedReaders();
} catch (Exception e) {
throw new FlushFailedEngineException(shardId, e);
}
Expand Down Expand Up @@ -1428,9 +1418,8 @@ public IndexCommitRef acquireIndexCommit(final boolean flushFirst) throws Engine
logger.trace("finish flush for snapshot");
}
try (ReleasableLock lock = readLock.acquire()) {
ensureOpen();
logger.trace("pulling snapshot");
return new IndexCommitRef(deletionPolicy);
return new IndexCommitRef(deletionPolicy.getIndexDeletionPolicy());
} catch (IOException e) {
throw new SnapshotFailedEngineException(shardId, e);
}
Expand Down Expand Up @@ -1781,10 +1770,9 @@ protected void doRun() throws Exception {
* @param writer the index writer to commit
* @param translog the translog
* @param syncId the sync flush ID ({@code null} if not committing a synced flush)
* @return the minimum translog generation for the local checkpoint committed with the specified index writer
* @throws IOException if an I/O exception occurs committing the specfied writer
*/
private long commitIndexWriter(final IndexWriter writer, final Translog translog, @Nullable final String syncId) throws IOException {
private void commitIndexWriter(final IndexWriter writer, final Translog translog, @Nullable final String syncId) throws IOException {
ensureCanFlush();
try {
final long localCheckpoint = seqNoService().getLocalCheckpoint();
Expand Down Expand Up @@ -1817,7 +1805,6 @@ private long commitIndexWriter(final IndexWriter writer, final Translog translog
});

writer.commit();
return translogGeneration.translogFileGeneration;
} catch (final Exception ex) {
try {
failEngine("lucene commit failed", ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ public IndexShard(ShardRouting shardRouting, IndexSettings indexSettings, ShardP
logger.debug("state: [CREATED]");

this.checkIndexOnStartup = indexSettings.getValue(IndexSettings.INDEX_CHECK_ON_STARTUP);
this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings,
bigArrays);
this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings, bigArrays);
// the query cache is a node-level thing, however we want the most popular filters
// to be computed on a per-shard basis
if (IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.get(settings)) {
Expand Down
Loading

0 comments on commit 1775e42

Please sign in to comment.