From 37b2d36edd264f09ad4c9202c4ab45972c733069 Mon Sep 17 00:00:00 2001 From: WangYuanben <48795318+YuanbenWang@users.noreply.github.com> Date: Mon, 10 Jul 2023 18:42:37 +0800 Subject: [PATCH] HDFS-17033. Update fsck to display stale state info of blocks accurately. (#5815). Contributed by Wang Yuanben. Reviewed-by: Shilun Fan Reviewed-by: Xing Lin Reviewed-by: Hualong Zhang Signed-off-by: He Xiaoqiao --- .../hdfs/server/namenode/NamenodeFsck.java | 3 + .../org/apache/hadoop/hdfs/tools/DFSck.java | 2 + .../hadoop/hdfs/server/namenode/TestFsck.java | 86 +++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 050eda628ee08..dfdb12285fb26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -126,6 +126,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { public static final String ENTERING_MAINTENANCE_STATUS = "is ENTERING MAINTENANCE"; public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE"; + public static final String STALE_STATUS = "is STALE"; public static final String NONEXISTENT_STATUS = "does not exist"; public static final String FAILURE_STATUS = "FAILED"; public static final String UNDEFINED = "undefined"; @@ -370,6 +371,8 @@ private void printDatanodeReplicaStatus(Block block, out.print(ENTERING_MAINTENANCE_STATUS); } else if (this.showMaintenanceState && dn.isInMaintenance()) { out.print(IN_MAINTENANCE_STATUS); + } else if (dn.isStale(this.staleInterval)) { + out.print(STALE_STATUS); } else { out.print(HEALTHY_STATUS); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index acb370e421a30..6587ffcce3c07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -404,6 +404,8 @@ else if (args[idx].equals("-replicaDetails")) { errCode = 4; } else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) { errCode = 5; + } else if (lastLine.endsWith(NamenodeFsck.STALE_STATUS)) { + errCode = 6; } return errCode; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index a312b03168b49..5db4e029cb9db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY; import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -1681,6 +1682,91 @@ public Boolean get() { assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS)); } + /** + * Test for blockIdCK with datanode staleness. + */ + @Test + public void testBlockIdCKStaleness() throws Exception { + final short replFactor = 1; + final long blockSize = 512; + Configuration configuration = new Configuration(); + + // Shorten dfs.namenode.stale.datanode.interval for easier testing. + configuration.setLong(DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5000); + configuration.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + configuration.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor); + + String[] racks = {"/rack1", "/rack2"}; + String[] hosts = {"host1", "host2"}; + + File builderBaseDir = new File(GenericTestUtils.getRandomizedTempPath()); + cluster = new MiniDFSCluster.Builder(configuration, builderBaseDir) + .hosts(hosts).racks(racks).build(); + assertNotNull("Failed Cluster Creation", cluster); + cluster.waitClusterUp(); + DistributedFileSystem fs = cluster.getFileSystem(); + assertNotNull("Failed to get FileSystem", fs); + + try { + DFSTestUtil util = new DFSTestUtil.Builder(). + setName(getClass().getSimpleName()).setNumFiles(1).build(); + + // Create one file. + final String pathString = new String("/testfile"); + final Path path = new Path(pathString); + util.createFile(fs, path, 1024L, replFactor, 1024L); + util.waitReplication(fs, path, replFactor); + StringBuilder sb = new StringBuilder(); + for (LocatedBlock lb: util.getAllBlocks(fs, path)) { + sb.append(lb.getBlock().getLocalBlock().getBlockName() + " "); + } + String[] bIds = sb.toString().split(" "); + + // Make sure datanode is HEALTHY before down. + String outStr = runFsck(configuration, 0, true, "/", "-blockId", bIds[0]); + assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); + + FSNamesystem fsn = cluster.getNameNode().getNamesystem(); + BlockManager bm = fsn.getBlockManager(); + DatanodeManager dnm = bm.getDatanodeManager(); + DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0) + .getDatanodeId()); + final String dnName = dn.getXferAddr(); + + // Make the block on datanode enter stale state. + cluster.stopDataNode(0); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + try { + DatanodeInfo datanodeInfo = null; + for (DatanodeInfo info : fs.getDataNodeStats()) { + if (dnName.equals(info.getXferAddr())) { + datanodeInfo = info; + } + } + if (datanodeInfo != null && datanodeInfo.isStale(5000)) { + return true; + } + } catch (Exception e) { + LOG.warn("Unexpected exception: " + e); + return false; + } + return false; + } + }, 500, 30000); + outStr = runFsck(configuration, 6, true, "/", "-blockId", bIds[0]); + assertTrue(outStr.contains(NamenodeFsck.STALE_STATUS)); + } finally { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + /** * Test for blockIdCK with block corruption. */