From a0dc2b944c4127556da6c6b4b69449d2d9a2d3cd Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 14:44:08 -0700 Subject: [PATCH] Fix shard failure due to translog tragic close on upload failure (#10363) (#10368) (cherry picked from commit a0cb34442f704309173e127675cc4ae83c7fb197) Signed-off-by: Ashish Singh Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../index/translog/RemoteFsTranslog.java | 23 +++++-------------- .../index/translog/RemoteFsTranslogTests.java | 23 ++++++++++++++++++- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 857d90e2e2ac2..29c825fd383c5 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -242,15 +242,10 @@ public static TranslogTransferManager buildTranslogTransferManager( @Override public boolean ensureSynced(Location location) throws IOException { - try { - assert location.generation <= current.getGeneration(); - if (location.generation == current.getGeneration()) { - ensureOpen(); - return prepareAndUpload(primaryTermSupplier.getAsLong(), location.generation); - } - } catch (final Exception ex) { - closeOnTragicEvent(ex); - throw ex; + assert location.generation <= current.getGeneration(); + if (location.generation == current.getGeneration()) { + ensureOpen(); + return prepareAndUpload(primaryTermSupplier.getAsLong(), location.generation); } return false; } @@ -355,14 +350,8 @@ private boolean syncToDisk() throws IOException { @Override public void sync() throws IOException { - try { - if (syncToDisk() || syncNeeded()) { - prepareAndUpload(primaryTermSupplier.getAsLong(), null); - } - } catch (final Exception e) { - tragedy.setTragicException(e); - closeOnTragicEvent(e); - throw e; + if (syncToDisk() || syncNeeded()) { + prepareAndUpload(primaryTermSupplier.getAsLong(), null); } } diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 84506f7ab25ff..b2310010620f7 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -67,6 +67,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; @@ -1049,7 +1050,7 @@ public void testSyncUpTo() throws IOException { } } - public void testSyncUpFailure() throws IOException { + public void testSyncUpLocationFailure() throws IOException { int translogOperations = randomIntBetween(1, 20); int count = 0; fail.failAlways(); @@ -1101,6 +1102,26 @@ public void testSyncUpFailure() throws IOException { assertDownloadStatsNoDownloads(statsTracker); } + public void testSyncUpAlwaysFailure() throws IOException { + int translogOperations = randomIntBetween(1, 20); + int count = 0; + fail.failAlways(); + for (int op = 0; op < translogOperations; op++) { + translog.add( + new Translog.Index(String.valueOf(op), count, primaryTerm.get(), Integer.toString(count).getBytes(StandardCharsets.UTF_8)) + ); + try { + translog.sync(); + fail("io exception expected"); + } catch (IOException e) { + assertTrue("at least one operation pending", translog.syncNeeded()); + } + } + assertTrue(translog.isOpen()); + fail.failNever(); + translog.sync(); + } + public void testSyncUpToStream() throws IOException { int iters = randomIntBetween(5, 10); for (int i = 0; i < iters; i++) {