Skip to content

Commit

Permalink
HDFS-17285. RBF: Add a safe mode check period configuration (apache#6347
Browse files Browse the repository at this point in the history
)  Contributed by LiuGuH.

Reviewed-by: Inigo Goiri <[email protected]>
Reviewed-by: Ayush Saxena <[email protected]>
Signed-off-by: Shilun Fan <[email protected]>
  • Loading branch information
LiuGuH authored Dec 21, 2023
1 parent 5dd1977 commit b4fed58
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
FEDERATION_ROUTER_PREFIX + "safemode.expiration";
public static final long DFS_ROUTER_SAFEMODE_EXPIRATION_DEFAULT =
3 * DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT;
public static final String DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS =
FEDERATION_ROUTER_PREFIX + "safemode.checkperiod";
public static final long DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS_DEFAULT =
TimeUnit.SECONDS.toMillis(5);

// HDFS Router-based federation mount table entries
/** Maximum number of cache entries to have. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ protected void serviceInit(Configuration conf) throws Exception {

// Use same interval as cache update service
this.setIntervalMs(conf.getTimeDuration(
RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT,
RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS_DEFAULT,
TimeUnit.MILLISECONDS));

this.startupInterval = conf.getTimeDuration(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,17 @@
</description>
</property>

<property>
<name>dfs.federation.router.safemode.checkperiod</name>
<value>5s</value>
<description>
How often the Router should check safe mode. This
setting supports multiple time unit suffixes as described in
dfs.heartbeat.interval. If no suffix is specified then milliseconds is
assumed.
</description>
</property>

<property>
<name>dfs.federation.router.monitor.namenode</name>
<value></value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.federation.router;

import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.deleteStateStore;
Expand Down Expand Up @@ -70,6 +71,9 @@ public static void create() throws IOException {
// 200 ms cache refresh
conf.setTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
200, TimeUnit.MILLISECONDS);
// 100 ms safemode checkperiod
conf.setTimeDuration(DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
100, TimeUnit.MILLISECONDS);
// 1 sec post cache update before entering safemode (2 intervals)
conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION,
TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
Expand Down Expand Up @@ -133,7 +137,7 @@ public void testRouterExitSafemode()
long interval =
conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION,
TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) +
conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
conf.getTimeDuration(DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
Thread.sleep(interval);

Expand Down

0 comments on commit b4fed58

Please sign in to comment.