From c7539783f29a8a641e278c25996b29e4c5783025 Mon Sep 17 00:00:00 2001 From: Steve Vaughan Jr Date: Mon, 25 Jul 2022 11:09:55 -0400 Subject: [PATCH 1/5] HDFS-16688. Unresolved Hosts during startup are not synced by JournalNodes Allow unresolved names during startup, so that when the hosts become available they will be included in JournalNode synchronization. This also requires that mechanisms that return a string representation which includes the IP address had to be updated to handle unresolved addresses. --- .../hadoop/hdfs/qjournal/client/IPCLoggerChannel.java | 7 ++++++- .../hdfs/qjournal/client/IPCLoggerChannelMetrics.java | 7 ++++++- .../java/org/apache/hadoop/hdfs/server/common/Util.java | 5 ++--- .../hadoop/hdfs/qjournal/client/TestQJMWithFaults.java | 5 ++++- .../java/org/apache/hadoop/hdfs/tools/TestGetConf.java | 6 +++--- 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java index 4b7e59c51f13e..4577ae8b1db3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java @@ -599,7 +599,12 @@ public ListenableFuture getJournalCTime() { @Override public String toString() { - return InetAddresses.toAddrString(addr.getAddress()) + ':' + addr.getPort(); + if (addr.isUnresolved()) { + return addr.getHostName() + ":" + addr.getPort(); + } else { + return InetAddresses.toAddrString(addr.getAddress()) + ':' + + addr.getPort(); + } } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java index 6eef8ffd38620..0d57d06eb9660 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannelMetrics.java @@ -104,7 +104,12 @@ static IPCLoggerChannelMetrics create(IPCLoggerChannel ch) { private static String getName(IPCLoggerChannel ch) { InetSocketAddress addr = ch.getRemoteAddress(); - String addrStr = addr.getAddress().getHostAddress(); + String addrStr; + if (addr.isUnresolved()) { + addrStr = addr.getHostName(); + } else { + addrStr = addr.getAddress().getHostAddress(); + } // IPv6 addresses have colons, which aren't allowed as part of // MBean names. Replace with '.' diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java index 5039db6ceb488..9bdeae7d43958 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java @@ -406,9 +406,8 @@ public static List getAddressesList(URI uri, Configuration co } else { InetSocketAddress isa = NetUtils.createSocketAddr( addr, DFSConfigKeys.DFS_JOURNALNODE_RPC_PORT_DEFAULT); - if (isa.isUnresolved()) { - throw new UnknownHostException(addr); - } + // The resulting address may be unresolved, but that will be corrected within Client once the + // server becomes available. addrs.add(isa); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index e9f46e335c6e6..0151a4d9e80c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -38,6 +38,8 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; @@ -192,7 +194,8 @@ public void testRecoverAfterDoubleFailures() throws Exception { } /** - * Expect {@link UnknownHostException} if a hostname can't be resolved. + * Expect an unresolved address if a hostname can't be resolved, but allowing the QJM to be + * configured. */ @Test public void testUnresolvableHostName() throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java index 82ea34f2e0f20..9c984a533513b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java @@ -484,10 +484,10 @@ public void testGetJournalNodes() throws Exception { conf.clear(); } - /* - ** Test for unknown journal node host exception. + /** + * Test handling of unresolvable journal node hosts. They are still configured assuming that + * they will be resolvable in the future. */ - @Test(expected = UnknownHostException.class, timeout = 10000) public void testUnknownJournalNodeHost() throws URISyntaxException, IOException { String journalsBaseUri = "qjournal://jn1:8020;jn2:8020;jn3:8020"; From a48b98853241b20f2725bda31ee833f2e8f6bbac Mon Sep 17 00:00:00 2001 From: Steve Vaughan Jr Date: Wed, 10 Aug 2022 21:35:37 -0400 Subject: [PATCH 2/5] Allow control if resolution is required Add a new configuration property that allows journalnodes to be temporarily unresolved. This allows the default behavior to be the same, but allows namenodes and journalnodes to handle resolution later. When not required, the servers will trust that the configuration is the desired state. --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 6 +++++ .../hadoop/hdfs/server/common/Util.java | 12 ++++++--- .../src/main/resources/hdfs-default.xml | 15 ++++++++++- .../hdfs/qjournal/server/TestJournalNode.java | 25 +++++++++++++++++++ 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 1ab7edd6adc00..0055a2eaf7c20 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -674,6 +674,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_ENABLED_DEFAULT = false; + public static final String + DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_REQUIRED = + "dfs.namenode.edits.qjournals.resolution-required"; + public static final boolean + DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_REQUIRED_DEFAULT = true; + public static final String DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_RESOLVER_IMPL = "dfs.namenode.edits.qjournals.resolver.impl"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java index 9bdeae7d43958..f81e547a3880f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java @@ -378,6 +378,11 @@ public static List getAddressesList(URI uri, Configuration co boolean resolveNeeded = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_ENABLED, DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_ENABLED_DEFAULT); + // If not required, the resulting address may be unresolved, but that can be corrected within + // the client once the server becomes available. + boolean resolveRequired = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_REQUIRED, + DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_REQUIRED_DEFAULT); DomainNameResolver dnr = DomainNameResolverFactory.newInstance( conf, DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_RESOLVER_IMPL); @@ -394,7 +399,7 @@ public static List getAddressesList(URI uri, Configuration co // QJM should just use FQDN String[] hostnames = dnr .getAllResolvedHostnameByDomainName(isa.getHostName(), true); - if (hostnames.length == 0) { + if (hostnames.length == 0 && resolveRequired) { throw new UnknownHostException(addr); } for (String h : hostnames) { @@ -406,8 +411,9 @@ public static List getAddressesList(URI uri, Configuration co } else { InetSocketAddress isa = NetUtils.createSocketAddr( addr, DFSConfigKeys.DFS_JOURNALNODE_RPC_PORT_DEFAULT); - // The resulting address may be unresolved, but that will be corrected within Client once the - // server becomes available. + if (isa.isUnresolved() && resolveRequired) { + throw new UnknownHostException(addr); + } addrs.add(isa); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 5643a9b5c5ee1..d04aaca1a3aa2 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -508,7 +508,20 @@ Determines if the given qjournals address is a domain name which needs to be resolved. - This is used by namenode to resolve qjournals. + This is used by namenode to resolve qjournals, and journalnode for syncing. + + + + + dfs.namenode.edits.qjournals.resolution-required + true + + Determines if the given qjournals address is a domain name which is + required to be resolved. + if resolution is required, then unresolvable addresses are ignored. + if resolution is not required, then the address may be temporarily + unresolved, allowing the client to resolve it later. + This is used by namenode to resolve qjournals, and journalnode for syncing. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java index 064dd9e5dd86e..92fa698cb8390 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java @@ -124,6 +124,11 @@ public void setup() throws Exception { "testJournalNodeSyncerNotStartWhenSyncEnabled")) { conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, "qjournal://jn0:9900;jn1:9901/" + journalId); + } else if (testName.getMethodName().equals( + "testJournalNodeSyncerStartWhenSyncEnabledAndUnresolved")) { + conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, + "qjournal://jn0:9900;jn1:9901/" + journalId); + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_QJOURNALS_RESOLUTION_REQUIRED, false); } else if (testName.getMethodName().equals( "testJournalNodeSyncwithFederationTypeConfigWithNameServiceId")) { conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY +".ns1", @@ -570,6 +575,12 @@ public void testJournalNodeSyncerNotStartWhenSyncEnabledIncorrectURI() } + /** + * Require resolution. Syncer must not start when there aren't any other known journal + * nodes due to the required resolution. + * + * @throws IOException if unable to get or create the journal + */ @Test public void testJournalNodeSyncerNotStartWhenSyncEnabled() throws IOException { @@ -593,6 +604,20 @@ public void testJournalNodeSyncerNotStartWhenSyncEnabled() } + /** + * Attempt resolution, but allow unresolved. Syncer must start because there are other potential + * journal nodes and syncing is enabled. The syncing will include the unresolved addresses in + * syncing so that once the target server is available it can be included in syncing. + * + * @throws IOException if unable to get or create the journal + */ + @Test + public void testJournalNodeSyncerStartWhenSyncEnabledAndUnresolved() + throws IOException { + jn.getOrCreateJournal(journalId); + Assert.assertEquals(true, + jn.getJournalSyncerStatus(journalId)); + } @Test public void testJournalNodeSyncwithFederationTypeConfigWithNameServiceId() From 827c89e88f57c871f3c7a86c257727604474d8e7 Mon Sep 17 00:00:00 2001 From: Steve Vaughan Jr Date: Thu, 11 Aug 2022 10:12:23 -0400 Subject: [PATCH 3/5] Clean-up unused imports --- .../apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java | 3 --- .../test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java | 1 - 2 files changed, 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index 0151a4d9e80c5..d5b8d63967ec5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -29,7 +29,6 @@ import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; -import java.net.UnknownHostException; import java.util.List; import java.util.Map; import java.util.Random; @@ -63,8 +62,6 @@ import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.slf4j.event.Level; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java index 9c984a533513b..074c88779580c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java @@ -37,7 +37,6 @@ import java.io.PrintStream; import java.net.InetSocketAddress; import java.net.URISyntaxException; -import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; From 9e0f026fe46b00926d499741bfe0c43f66e1b6a1 Mon Sep 17 00:00:00 2001 From: Steve Vaughan Jr Date: Fri, 12 Aug 2022 11:21:33 -0400 Subject: [PATCH 4/5] Fix typo that disabled test This had initially been intended as a change to the test, since allowing unresolved addresses was the goal. I instead introduced a setting to control when resolution was required. --- .../src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java index 074c88779580c..d717429f495d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java @@ -37,6 +37,7 @@ import java.io.PrintStream; import java.net.InetSocketAddress; import java.net.URISyntaxException; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -487,6 +488,7 @@ public void testGetJournalNodes() throws Exception { * Test handling of unresolvable journal node hosts. They are still configured assuming that * they will be resolvable in the future. */ + @Test(expected = UnknownHostException.class, timeout = 10000) public void testUnknownJournalNodeHost() throws URISyntaxException, IOException { String journalsBaseUri = "qjournal://jn1:8020;jn2:8020;jn3:8020"; From 1a2b49c34062fb00339ee49af9c961a3686decca Mon Sep 17 00:00:00 2001 From: Steve Vaughan Jr Date: Sun, 14 Aug 2022 09:04:11 -0400 Subject: [PATCH 5/5] Restore UnknownHostException with required With the introduction of "dfs.namenode.edits.qjournals.resolution-required" (defaults to true) we can restore the original check for a thrown exception. Also update the hostname to avoid incidental local matches during lookup. --- .../apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index d5b8d63967ec5..23a7bf9ae595a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -29,6 +29,7 @@ import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; +import java.net.UnknownHostException; import java.util.List; import java.util.Map; import java.util.Random; @@ -191,8 +192,7 @@ public void testRecoverAfterDoubleFailures() throws Exception { } /** - * Expect an unresolved address if a hostname can't be resolved, but allowing the QJM to be - * configured. + * Expect {@link UnknownHostException} if a hostname can't be resolved. */ @Test public void testUnresolvableHostName() throws Exception {