Skip to content

Commit

Permalink
HDFS-17033. Update fsck to display stale state info of blocks accurat…
Browse files Browse the repository at this point in the history
…ely. (#5815). Contributed by Wang Yuanben.

Reviewed-by: Shilun Fan <[email protected]>
Reviewed-by: Xing Lin <[email protected]>
Reviewed-by: Hualong Zhang <[email protected]>
Signed-off-by: He Xiaoqiao <[email protected]>
  • Loading branch information
YuanbenWang authored Jul 10, 2023
1 parent a84284e commit 37b2d36
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
public static final String ENTERING_MAINTENANCE_STATUS =
"is ENTERING MAINTENANCE";
public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
public static final String STALE_STATUS = "is STALE";
public static final String NONEXISTENT_STATUS = "does not exist";
public static final String FAILURE_STATUS = "FAILED";
public static final String UNDEFINED = "undefined";
Expand Down Expand Up @@ -370,6 +371,8 @@ private void printDatanodeReplicaStatus(Block block,
out.print(ENTERING_MAINTENANCE_STATUS);
} else if (this.showMaintenanceState && dn.isInMaintenance()) {
out.print(IN_MAINTENANCE_STATUS);
} else if (dn.isStale(this.staleInterval)) {
out.print(STALE_STATUS);
} else {
out.print(HEALTHY_STATUS);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ else if (args[idx].equals("-replicaDetails")) {
errCode = 4;
} else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
errCode = 5;
} else if (lastLine.endsWith(NamenodeFsck.STALE_STATUS)) {
errCode = 6;
}
return errCode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.hadoop.hdfs.server.namenode;

import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
Expand Down Expand Up @@ -1681,6 +1682,91 @@ public Boolean get() {
assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
}

/**
* Test for blockIdCK with datanode staleness.
*/
@Test
public void testBlockIdCKStaleness() throws Exception {
final short replFactor = 1;
final long blockSize = 512;
Configuration configuration = new Configuration();

// Shorten dfs.namenode.stale.datanode.interval for easier testing.
configuration.setLong(DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5000);
configuration.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
configuration.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);

String[] racks = {"/rack1", "/rack2"};
String[] hosts = {"host1", "host2"};

File builderBaseDir = new File(GenericTestUtils.getRandomizedTempPath());
cluster = new MiniDFSCluster.Builder(configuration, builderBaseDir)
.hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
DistributedFileSystem fs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", fs);

try {
DFSTestUtil util = new DFSTestUtil.Builder().
setName(getClass().getSimpleName()).setNumFiles(1).build();

// Create one file.
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
util.createFile(fs, path, 1024L, replFactor, 1024L);
util.waitReplication(fs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb: util.getAllBlocks(fs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");

// Make sure datanode is HEALTHY before down.
String outStr = runFsck(configuration, 0, true, "/", "-blockId", bIds[0]);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));

FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
.getDatanodeId());
final String dnName = dn.getXferAddr();

// Make the block on datanode enter stale state.
cluster.stopDataNode(0);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
DatanodeInfo datanodeInfo = null;
for (DatanodeInfo info : fs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isStale(5000)) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
outStr = runFsck(configuration, 6, true, "/", "-blockId", bIds[0]);
assertTrue(outStr.contains(NamenodeFsck.STALE_STATUS));
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}

/**
* Test for blockIdCK with block corruption.
*/
Expand Down

0 comments on commit 37b2d36

Please sign in to comment.