Skip to content

Commit

Permalink
HDDS-858. Start a Standalone Ratis Server on OM
Browse files Browse the repository at this point in the history
  • Loading branch information
hanishakoneru committed Dec 6, 2018
1 parent 8d882c3 commit 019836b
Show file tree
Hide file tree
Showing 8 changed files with 581 additions and 7 deletions.
98 changes: 97 additions & 1 deletion hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,103 @@
<value>1MB</value>
<tag>OZONE, CLIENT, MANAGEMENT</tag>
<description>Checksum will be computed for every bytes per checksum number
of bytes and stored sequentially.
of bytes and stored sequentially. The minimum value for this config is
256KB.
</description>
</property>

<property>
<name>ozone.om.ratis.enable</name>
<value>false</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>Property to enable or disable Ratis server on OM.
Please note - this is a temporary property to disable OM Ratis server.
</description>
</property>

<property>
<name>ozone.om.ratis.port</name>
<value>9872</value>
<tag>OZONE, OM, RATIS</tag>
<description>
The port number of the OzoneManager's Ratis server.
</description>
</property>

<property>
<name>ozone.om.ratis.random.port</name>
<value>false</value>
<tag>OZONE, OM, RATIS, DEBUG</tag>
<description>Allocates a random free port for OM's Ratis server. This is
used only while running unit tests.
</description>
</property>

<property>
<name>ozone.om.ratis.rpc.type</name>
<value>GRPC</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>Ratis supports different kinds of transports like netty, GRPC,
Hadoop RPC etc. This picks one of those for this cluster.
</description>
</property>

<property>
<name>ozone.om.ratis.storage.dir</name>
<value/>
<tag>OZONE, OM, STORAGE, MANAGEMENT, RATIS</tag>
<description>This directory is used for storing OM's Ratis metadata like
logs. If this is not set then default metadata dirs is used. A warning
will be logged if this not set. Ideally, this should be mapped to a
fast disk like an SSD.
</description>
</property>

<property>
<name>ozone.om.ratis.segment.size</name>
<value>16KB</value>
<tag>OZONE, OM, RATIS, PERFORMANCE</tag>
<description>The size of the raft segment used by Apache Ratis on OM.
(16 KB by default)
</description>
</property>

<property>
<name>ozone.om.ratis.segment.preallocated.size</name>
<value>128MB</value>
<tag>OZONE, OM, RATIS, PERFORMANCE</tag>
<description>The size of the buffer which is preallocated for raft segment
used by Apache Ratis on OM.(128 MB by default)
</description>
</property>

<property>
<name>ozone.om.ratis.server.request.timeout</name>
<value>3s</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>The timeout duration for OM's ratis server request .</description>
</property>

<property>
<name>ozone.om.ratis.server.retry.cache.timeout</name>
<value>600000ms</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>Retry Cache entry timeout for OM's ratis server.</description>
</property>

<property>
<name>ozone.om.ratis.minimum.timeout</name>
<value>1s</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>The minimum timeout duration for OM's Ratis server rpc.
</description>
</property>

<property>
<name>ozone.om.ratis.client.request.timeout</name>
<value>3s</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>The timeout duration for OM Ratis client request.
</description>
</property>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,13 +337,16 @@ public static String getOzoneDatanodeRatisDirectory(Configuration conf) {
OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR);

if (Strings.isNullOrEmpty(storageDir)) {
LOG.warn("Storage directory for Ratis is not configured." +
"Mapping Ratis storage under {}. It is a good idea " +
"to map this to an SSD disk. Falling back to {}",
storageDir, HddsConfigKeys.OZONE_METADATA_DIRS);
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
storageDir = (new File (metaDirPath, "ratis")).getPath();
storageDir = getDefaultRatisDirectory(conf);
}
return storageDir;
}

public static String getDefaultRatisDirectory(Configuration conf) {
LOG.warn("Storage directory for Ratis is not configured. It is a good " +
"idea to map this to an SSD disk. Falling back to {}",
HddsConfigKeys.OZONE_METADATA_DIRS);
File metaDirPath = ServerUtils.getOzoneMetaDirPath(conf);
return (new File(metaDirPath, "ratis")).getPath();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

package org.apache.hadoop.ozone.om;

import java.util.concurrent.TimeUnit;

import org.apache.hadoop.ozone.OzoneAcl;
import org.apache.ratis.util.TimeDuration;

/**
* Ozone Manager Constants.
Expand Down Expand Up @@ -86,4 +89,61 @@ private OMConfigKeys() {
public static final String OZONE_OM_METRICS_SAVE_INTERVAL =
"ozone.om.save.metrics.interval";
public static final String OZONE_OM_METRICS_SAVE_INTERVAL_DEFAULT = "5m";

/**
* OM Ratis related configurations.
*/
public static final String OZONE_OM_RATIS_ENABLE_KEY
= "ozone.om.ratis.enable";
public static final boolean OZONE_OM_RATIS_ENABLE_DEFAULT
= false;
public static final String OZONE_OM_RATIS_PORT_KEY
= "ozone.om.ratis.port";
public static final int OZONE_OM_RATIS_PORT_DEFAULT
= 9872;
// When set to true, allocate a random free port for ozone ratis server
public static final String OZONE_OM_RATIS_RANDOM_PORT_KEY =
"ozone.om.ratis.random.port";
public static final boolean OZONE_OM_RATIS_RANDOM_PORT_KEY_DEFAULT
= false;
public static final String OZONE_OM_RATIS_RPC_TYPE_KEY
= "ozone.om.ratis.rpc.type";
public static final String OZONE_OM_RATIS_RPC_TYPE_DEFAULT
= "GRPC";

// OM Ratis Log configurations
public static final String OZONE_OM_RATIS_STORAGE_DIR
= "ozone.om.ratis.storage.dir";
public static final String OZONE_OM_RATIS_SEGMENT_SIZE_KEY
= "ozone.om.ratis.segment.size";
public static final String OZONE_OM_RATIS_SEGMENT_SIZE_DEFAULT
= "16KB";
public static final String OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY
= "ozone.om.ratis.segment.preallocated.size";
public static final String OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT
= "128MB";

// OM Ratis server configurations
public static final String OZONE_OM_RATIS_SERVER_REQUEST_TIMEOUT_KEY
= "ozone.om.ratis.server.request.timeout";
public static final TimeDuration
OZONE_OM_RATIS_SERVER_REQUEST_TIMEOUT_DEFAULT
= TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS);
public static final String
OZONE_OM_RATIS_SERVER_RETRY_CACHE_TIMEOUT_KEY
= "ozone.om.ratis.server.retry.cache.timeout";
public static final TimeDuration
OZONE_OM_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DEFAULT
= TimeDuration.valueOf(600000, TimeUnit.MILLISECONDS);
public static final String OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY
= "ozone.om.ratis.minimum.timeout";
public static final TimeDuration OZONE_OM_RATIS_MINIMUM_TIMEOUT_DEFAULT
= TimeDuration.valueOf(1, TimeUnit.SECONDS);

// OM Ratis client configurations
public static final String OZONE_OM_RATIS_CLIENT_REQUEST_TIMEOUT_KEY
= "ozone.om.ratis.client.request.timeout";
public static final TimeDuration
OZONE_OM_RATIS_CLIENT_REQUEST_TIMEOUT_DEFAULT
= TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS);
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.apache.hadoop.utils.db.Table;
import org.apache.hadoop.utils.db.Table.KeyValue;
import org.apache.hadoop.utils.db.TableIterator;
import org.apache.ratis.util.LifeCycle;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -1368,4 +1369,25 @@ public void testGetServiceList() throws IOException {
Assert.assertEquals(NetUtils.createSocketAddr(
conf.get(OZONE_SCM_CLIENT_ADDRESS_KEY)), scmAddress);
}

/**
* Test that OM Ratis server is started only when OZONE_OM_RATIS_ENABLE_KEY is
* set to true.
*/
@Test
public void testRatsiServerOnOmInitialization() throws IOException {
// OM Ratis server should not be started when OZONE_OM_RATIS_ENABLE_KEY
// is not set to true
Assert.assertNull("OM Ratis server started though OM Ratis is disabled.",
cluster.getOzoneManager().getOmRatisServerState());

// Enable OM Ratis and restart OM
conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true);
cluster.restartOzoneManager();

// On enabling OM Ratis, the Ratis server should be started
Assert.assertEquals("OM Ratis server did not start",
LifeCycle.State.RUNNING,
cluster.getOzoneManager().getOmRatisServerState());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@
import org.apache.hadoop.ozone.om.helpers.ServiceInfo;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
import org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB;
import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneAclInfo;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ServicePort;
import org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
import org.apache.ratis.util.LifeCycle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -136,6 +138,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
private final OzoneConfiguration configuration;
private RPC.Server omRpcServer;
private InetSocketAddress omRpcAddress;
private OzoneManagerRatisServer omRatisServer;
private final OMMetadataManager metadataManager;
private final VolumeManager volumeManager;
private final BucketManager bucketManager;
Expand Down Expand Up @@ -509,6 +512,15 @@ public OMStorage getOmStorage() {
return omStorage;
}

@VisibleForTesting
public LifeCycle.State getOmRatisServerState() {
if (omRatisServer == null) {
return null;
} else {
return omRatisServer.getServerState();
}
}

/**
* Get metadata manager.
*
Expand Down Expand Up @@ -542,6 +554,22 @@ public void start() throws IOException {
LOG.info(buildRpcServerStartMessage("OzoneManager RPC server",
omRpcAddress));

boolean omRatisEnabled = configuration.getBoolean(
OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY,
OMConfigKeys.OZONE_OM_RATIS_ENABLE_DEFAULT);
// This is a temporary check. Once fully implemented, all OM state change
// should go through Ratis, either standalone (for non-HA) or replicated
// (for HA).
if (omRatisEnabled) {
omRatisServer = OzoneManagerRatisServer.newOMRatisServer(
omStorage.getOmId(), configuration);
omRatisServer.start();

LOG.info("OzoneManager Ratis server started at port {}",
omRatisServer.getServerPort());
} else {
omRatisServer = null;
}

DefaultMetricsSystem.initialize("OzoneManager");

Expand Down Expand Up @@ -584,6 +612,9 @@ public void stop() {
metricsTimer = null;
scheduleOMMetricsWriteTask = null;
omRpcServer.stop();
if (omRatisServer != null) {
omRatisServer.stop();
}
keyManager.stop();
httpServer.stop();
metadataManager.stop();
Expand Down
Loading

0 comments on commit 019836b

Please sign in to comment.