Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDFS-16457.Make fs.getspaceused.classname reconfigurable #4069

Merged
merged 15 commits into from
Apr 8, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.datanode;


import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_CLASSNAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_DEFAULT;
Expand Down Expand Up @@ -349,7 +350,8 @@ public class DataNode extends ReconfigurableBase
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
FS_DU_INTERVAL_KEY,
FS_GETSPACEUSED_JITTER_KEY));
FS_GETSPACEUSED_JITTER_KEY,
FS_GETSPACEUSED_CLASSNAME));

public static final Log METRICS_LOG = LogFactory.getLog("DataNodeMetricsLog");

Expand Down Expand Up @@ -684,13 +686,30 @@ public String reconfigurePropertyImpl(String property, String newVal)
case FS_DU_INTERVAL_KEY:
case FS_GETSPACEUSED_JITTER_KEY:
return reconfDfsUsageParameters(property, newVal);
case FS_GETSPACEUSED_CLASSNAME:
reconfSpaceUsedKlass();
return newVal;
default:
break;
}
throw new ReconfigurationException(
property, newVal, getConf().get(property));
}

private void reconfSpaceUsedKlass(){
List<FsVolumeImpl> volumeList = data.getVolumeList();
for (FsVolumeImpl fsVolume : volumeList) {
Map<String, BlockPoolSlice> blockPoolSlices = fsVolume.getBlockPoolSlices();
for (Entry<String, BlockPoolSlice> entry : blockPoolSlices.entrySet()) {
try {
entry.getValue().refreshSpaceUsedKlass(getNewConf());
} catch (IOException e) {
e.printStackTrace();
}
}
}
}

private String reconfDataXceiverParameters(String property, String newVal)
throws ReconfigurationException {
String result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,16 @@ public GetSpaceUsed getDfsUsage() {
return dfsUsage;
}

public void refreshSpaceUsedKlass(Configuration conf) throws IOException {
((CachingGetSpaceUsed) dfsUsage).close();
this.dfsUsage = new FSCachingGetSpaceUsed.Builder().setBpid(bpid)
.setVolume(volume)
.setPath(bpDir)
.setConf(conf)
.setInitialUsed(loadDfsUsed())
.build();
}

private synchronized static void initializeAddReplicaPool(Configuration conf,
FsDatasetImpl dataset) {
if (addReplicaThreadPool == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.hadoop.hdfs.server.datanode;

import org.apache.hadoop.fs.CachingGetSpaceUsed;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_GETSPACEUSED_CLASSNAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DU_INTERVAL_KEY;
Expand Down Expand Up @@ -58,7 +60,6 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.ReconfigurationException;
import org.apache.hadoop.fs.CachingGetSpaceUsed;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.GetSpaceUsed;
Expand Down Expand Up @@ -86,6 +87,7 @@ public class TestDataNodeReconfiguration {
private final int NUM_NAME_NODE = 1;
private final int NUM_DATA_NODE = 10;
private MiniDFSCluster cluster;
private static long counter = 0;

@Before
public void Setup() throws IOException {
Expand All @@ -109,6 +111,7 @@ private void startDFSCluster(int numNameNodes, int numDataNodes)
throws IOException {
Configuration conf = new Configuration();
conf.setBoolean(DFS_DATANODE_PEER_STATS_ENABLED_KEY, true);
conf.setClass(FS_GETSPACEUSED_CLASSNAME, DummyCachingGetSpaceUsed.class, CachingGetSpaceUsed.class);

MiniDFSNNTopology nnTopology = MiniDFSNNTopology
.simpleFederatedTopology(numNameNodes);
Expand Down Expand Up @@ -756,4 +759,33 @@ public void testDfsUsageParameters() throws ReconfigurationException {
}
}
}

public static class DummyCachingGetSpaceUsed extends CachingGetSpaceUsed {
public DummyCachingGetSpaceUsed(Builder builder) throws IOException {
super(builder.setInterval(1000).setJitter(0L));
}

@Override
protected void refresh() {
counter++;
}
}

@Test
public void testDfsUsageKlass() throws IOException, ReconfigurationException,
InterruptedException {

long lastCounter = counter;
Thread.sleep(5000);
assertTrue(counter > lastCounter);

for (int i = 0; i < NUM_DATA_NODE; i++) {
DataNode dn = cluster.getDataNodes().get(i);
dn.reconfigurePropertyImpl(FS_GETSPACEUSED_CLASSNAME, null);
}

lastCounter = counter;
Thread.sleep(5000);
assertEquals(lastCounter, counter);
}
Copy link
Member

@tasanuma tasanuma Mar 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@singer-bin Thanks for updating the PR.
How about we stop using conf.setClass in startDFCluster, and reconfigure DummyCachingGetSpaceUsed in the unit test, and replace the order of the assertions? (But after this change, somehow the unit test failed.)

Suggested change
@Test
public void testDfsUsageKlass() throws IOException, ReconfigurationException,
InterruptedException {
long lastCounter = counter;
Thread.sleep(5000);
assertTrue(counter > lastCounter);
for (int i = 0; i < NUM_DATA_NODE; i++) {
DataNode dn = cluster.getDataNodes().get(i);
dn.reconfigurePropertyImpl(FS_GETSPACEUSED_CLASSNAME, null);
}
lastCounter = counter;
Thread.sleep(5000);
assertEquals(lastCounter, counter);
}
@Test
public void testDfsUsageKlass() throws IOException, ReconfigurationException,
InterruptedException {
long lastCounter = counter;
Thread.sleep(5000);
assertEquals(lastCounter, counter);
for (int i = 0; i < NUM_DATA_NODE; i++) {
DataNode dn = cluster.getDataNodes().get(i);
dn.reconfigurePropertyImpl(FS_GETSPACEUSED_CLASSNAME, DummyCachingGetSpaceUsed.class.getName());
}
lastCounter = counter;
Thread.sleep(5000);
assertTrue(counter > lastCounter);
}

Copy link
Contributor Author

@singer-bin singer-bin Apr 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, thanks for your comment. My UT ideas come from HDFS-15120 ,you can take a look to help understand. My conf comes from the getNewConf() method as follows:

private void reconfSpaceUsedKlass(){ 
    List<FsVolumeImpl> volumeList = data.getVolumeList(); 
    for (FsVolumeImpl fsVolume : volumeList) { 
      Map<String, BlockPoolSlice> blockPoolSlices = fsVolume.getBlockPoolSlices(); 
      for (BlockPoolSlice bp : blockPoolSlices.values()) { 
        try { 
          bp.refreshSpaceUsedKlass(getNewConf()); 
        } catch (IOException e) { 
          e.printStackTrace(); 
        } 
      } 
    } 
  }

so , passing in DummyCachingGetSpaceUsed.class.toString() in

dn.reconfigurePropertyImpl(FS_GETSPACEUSED_CLASSNAME, DummyCachingGetSpaceUsed.class.toString());
doesn't make sense to me.
I need to set it to my own custom class when the service starts, then wait for the counter to change, and then use dn.reconfigurePropertyImpl(FS_GETSPACEUSED_CLASSNAME, null); to refresh the value to the default,and then the counter is not changing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking forward to your comments and suggestions. @tasanuma

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@singer-bin

  • As well as other reconfiguring properties of DataNode, I prefer to be able to update parameters by calling reconf.. .Parameters(property, newVal).
  • I think this PR is similar to HDFS-16413. Reconfig dfs usage parameters for datanode #3863. Instead of creating the new method of reconfSpaceUsedKlass(), how about extending reconfDfsUsageParameters to update fs.getspaceused.classname?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you very much for your comment, I will consider it. @tasanuma

}
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ public void testDataNodeGetReconfigurableProperties() throws IOException {
final List<String> outs = Lists.newArrayList();
final List<String> errs = Lists.newArrayList();
getReconfigurableProperties("datanode", address, outs, errs);
assertEquals(18, outs.size());
assertEquals(19, outs.size());
assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1));
}

Expand Down