From f147d3607e34f60ddfbcaacb79419d7aaa3e75f8 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 02:06:39 +0000 Subject: [PATCH] [Enhancement] Log the content of journal entity on replay failure (backport #46011) (#46026) Signed-off-by: yiming Co-authored-by: yiming --- .../persist/metablock/SRMetaBlockReader.java | 2 +- .../com/starrocks/server/GlobalStateMgr.java | 22 ++++++++++++++----- .../com/starrocks/server/LocalMetastore.java | 2 ++ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/persist/metablock/SRMetaBlockReader.java b/fe/fe-core/src/main/java/com/starrocks/persist/metablock/SRMetaBlockReader.java index 649fb8548fd27..4f0f859ca9cd5 100644 --- a/fe/fe-core/src/main/java/com/starrocks/persist/metablock/SRMetaBlockReader.java +++ b/fe/fe-core/src/main/java/com/starrocks/persist/metablock/SRMetaBlockReader.java @@ -110,7 +110,7 @@ public void close() throws IOException, SRMetaBlockException { LOG.warn("Meta block for {} read {} json < total {} json, will skip the rest {} json", header.getSrMetaBlockID(), numJsonRead, header.getNumJson(), rest); for (int i = 0; i != rest; ++i) { - LOG.warn("skip {} json: {}", i, Text.readStringWithChecksum(checkedInputStream)); + LOG.warn("skip {}th json: {}", i, Text.readStringWithChecksum(checkedInputStream)); } } diff --git a/fe/fe-core/src/main/java/com/starrocks/server/GlobalStateMgr.java b/fe/fe-core/src/main/java/com/starrocks/server/GlobalStateMgr.java index 76ef847e366b1..69d8811aaaf31 100644 --- a/fe/fe-core/src/main/java/com/starrocks/server/GlobalStateMgr.java +++ b/fe/fe-core/src/main/java/com/starrocks/server/GlobalStateMgr.java @@ -2370,17 +2370,16 @@ protected boolean replayJournalInner(JournalCursor cursor, boolean flowControl) EditLog.loadJournal(this, entity); } catch (Throwable e) { if (canSkipBadReplayedJournal(e)) { - LOG.error("!!! DANGER: SKIP JOURNAL {}: {} !!!", - replayedJournalId.incrementAndGet(), - entity == null ? null : GsonUtils.GSON.toJson(entity.getData()), - e); + LOG.error("!!! DANGER: SKIP JOURNAL, id: {}, data: {} !!!", + replayedJournalId.incrementAndGet(), journalEntityToReadableString(entity), e); if (!readSucc) { cursor.skipNext(); } continue; } // handled in outer loop - LOG.warn("catch exception when replaying {},", replayedJournalId.get() + 1, e); + LOG.warn("catch exception when replaying journal, id: {}, data: {},", + replayedJournalId.get() + 1, journalEntityToReadableString(entity), e); throw e; } @@ -2418,6 +2417,19 @@ protected boolean replayJournalInner(JournalCursor cursor, boolean flowControl) return false; } + private String journalEntityToReadableString(JournalEntity entity) { + if (entity == null) { + return "null"; + } + Writable data = entity.getData(); + try { + return GsonUtils.GSON.toJson(data); + } catch (Exception e) { + // In older version, data may not be json, here we just return the class name. + return data.getClass().getName(); + } + } + protected boolean canSkipBadReplayedJournal(Throwable t) { if (Config.metadata_enable_recovery_mode) { LOG.warn("skip journal load failure because cluster is in recovery mode"); diff --git a/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java b/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java index 840a0419ffa73..3f00b371cbd14 100644 --- a/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java +++ b/fe/fe-core/src/main/java/com/starrocks/server/LocalMetastore.java @@ -2298,6 +2298,8 @@ public void replayCreateTable(CreateTableInfo info) { table.onReload(); } catch (Throwable e) { LOG.error("replay create table failed: {}", table, e); + // Rethrow, we should not eat the exception when replaying editlog. + throw e; } finally { db.writeUnlock(); }