From 559e29713009ad073e56318d204dd927c2361b37 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 13 Aug 2019 13:46:36 -0400 Subject: [PATCH] Do not hold writeLock while verifying Lucene/translog We should not hold Engine#writeLock while executing assertConsistentHistoryBetweenTranslogAndLuceneIndex for this check might acquire Engine#readLock. Relates #45461 --- .../index/engine/EngineTestCase.java | 76 +++++++++---------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index ca6d752dd7840..95ff43e8b3c7d 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -67,7 +67,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.concurrent.ReleasableLock; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -1075,48 +1074,45 @@ public static void assertConsistentHistoryBetweenTranslogAndLuceneIndex(Engine e || (engine instanceof InternalEngine) == false) { return; } - // hold writeLock during this check so indexing can't happen - try (ReleasableLock ignored = engine.writeLock.acquire()) { - final List translogOps = new ArrayList<>(); - try (Translog.Snapshot snapshot = EngineTestCase.getTranslog(engine).newSnapshot()) { - Translog.Operation op; - while ((op = snapshot.next()) != null) { - translogOps.add(op); - } - } - final Map luceneOps = readAllOperationsInLucene(engine, mapper).stream() - .collect(Collectors.toMap(Translog.Operation::seqNo, Function.identity())); - final long maxSeqNo = ((InternalEngine) engine).getLocalCheckpointTracker().getMaxSeqNo(); - for (Translog.Operation op : translogOps) { - assertThat("translog operation [" + op + "] > max_seq_no[" + maxSeqNo + "]", op.seqNo(), lessThanOrEqualTo(maxSeqNo)); - } - for (Translog.Operation op : luceneOps.values()) { - assertThat("lucene operation [" + op + "] > max_seq_no[" + maxSeqNo + "]", op.seqNo(), lessThanOrEqualTo(maxSeqNo)); - } - final long globalCheckpoint = EngineTestCase.getTranslog(engine).getLastSyncedGlobalCheckpoint(); - final long retainedOps = engine.config().getIndexSettings().getSoftDeleteRetentionOperations(); - final long seqNoForRecovery; - try (Engine.IndexCommitRef safeCommit = engine.acquireSafeIndexCommit()) { - seqNoForRecovery = Long.parseLong(safeCommit.getIndexCommit().getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) + 1; + final List translogOps = new ArrayList<>(); + try (Translog.Snapshot snapshot = EngineTestCase.getTranslog(engine).newSnapshot()) { + Translog.Operation op; + while ((op = snapshot.next()) != null) { + translogOps.add(op); } - final long minSeqNoToRetain = Math.min(seqNoForRecovery, globalCheckpoint + 1 - retainedOps); - for (Translog.Operation translogOp : translogOps) { - final Translog.Operation luceneOp = luceneOps.get(translogOp.seqNo()); - if (luceneOp == null) { - if (minSeqNoToRetain <= translogOp.seqNo()) { - fail("Operation not found seq# [" + translogOp.seqNo() + "], global checkpoint [" + globalCheckpoint + "], " + - "retention policy [" + retainedOps + "], maxSeqNo [" + maxSeqNo + "], translog op [" + translogOp + "]"); - } else { - continue; - } - } - assertThat(luceneOp, notNullValue()); - assertThat(luceneOp.toString(), luceneOp.primaryTerm(), equalTo(translogOp.primaryTerm())); - assertThat(luceneOp.opType(), equalTo(translogOp.opType())); - if (luceneOp.opType() == Translog.Operation.Type.INDEX) { - assertThat(luceneOp.getSource().source, equalTo(translogOp.getSource().source)); + } + final Map luceneOps = readAllOperationsInLucene(engine, mapper).stream() + .collect(Collectors.toMap(Translog.Operation::seqNo, Function.identity())); + final long maxSeqNo = ((InternalEngine) engine).getLocalCheckpointTracker().getMaxSeqNo(); + for (Translog.Operation op : translogOps) { + assertThat("translog operation [" + op + "] > max_seq_no[" + maxSeqNo + "]", op.seqNo(), lessThanOrEqualTo(maxSeqNo)); + } + for (Translog.Operation op : luceneOps.values()) { + assertThat("lucene operation [" + op + "] > max_seq_no[" + maxSeqNo + "]", op.seqNo(), lessThanOrEqualTo(maxSeqNo)); + } + final long globalCheckpoint = EngineTestCase.getTranslog(engine).getLastSyncedGlobalCheckpoint(); + final long retainedOps = engine.config().getIndexSettings().getSoftDeleteRetentionOperations(); + final long seqNoForRecovery; + try (Engine.IndexCommitRef safeCommit = engine.acquireSafeIndexCommit()) { + seqNoForRecovery = Long.parseLong(safeCommit.getIndexCommit().getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY)) + 1; + } + final long minSeqNoToRetain = Math.min(seqNoForRecovery, globalCheckpoint + 1 - retainedOps); + for (Translog.Operation translogOp : translogOps) { + final Translog.Operation luceneOp = luceneOps.get(translogOp.seqNo()); + if (luceneOp == null) { + if (minSeqNoToRetain <= translogOp.seqNo()) { + fail("Operation not found seq# [" + translogOp.seqNo() + "], global checkpoint [" + globalCheckpoint + "], " + + "retention policy [" + retainedOps + "], maxSeqNo [" + maxSeqNo + "], translog op [" + translogOp + "]"); + } else { + continue; } } + assertThat(luceneOp, notNullValue()); + assertThat(luceneOp.toString(), luceneOp.primaryTerm(), equalTo(translogOp.primaryTerm())); + assertThat(luceneOp.opType(), equalTo(translogOp.opType())); + if (luceneOp.opType() == Translog.Operation.Type.INDEX) { + assertThat(luceneOp.getSource().source, equalTo(translogOp.getSource().source)); + } } }