Skip to content

Commit

Permalink
YARN-11709. NodeManager should be shut down or blacklisted when it ca…
Browse files Browse the repository at this point in the history
…cannot run program /var/lib/yarn-ce/bin/container-executor (apache#6960)
  • Loading branch information
ferdelyi authored Aug 16, 2024
1 parent 5f93edf commit f000942
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,10 @@ public void startLocalizer(LocalizerStartContext ctx)

} catch (PrivilegedOperationException e) {
int exitCode = e.getExitCode();
LOG.warn("Exit code from container {} startLocalizer is : {}",
locId, exitCode, e);
LOG.error("Unrecoverable issue occurred. Marking the node as unhealthy to prevent "
+ "further containers to get scheduled on the node and cause application failures. " +
"Exit code from the container " + locId + "startLocalizer is : " + exitCode, e);
nmContext.getNodeStatusUpdater().reportException(e);

throw new IOException("Application " + appId + " initialization failed" +
" (exitCode=" + exitCode + ") with output: " + e.getOutput(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
Expand All @@ -37,6 +38,7 @@
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.lang.reflect.Field;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -345,7 +347,8 @@ public void testStartLocalizer() throws IOException {

@Test
public void testContainerLaunchError()
throws IOException, ContainerExecutionException, URISyntaxException {
throws IOException, ContainerExecutionException, URISyntaxException, IllegalAccessException,
NoSuchFieldException {

final String[] expecetedMessage = {"badcommand", "Exit code: 24"};
final String[] executor = {
Expand Down Expand Up @@ -387,6 +390,14 @@ public Object answer(InvocationOnMock invocationOnMock)
dirsHandler.init(conf);
mockExec.setConf(conf);

//set the private nmContext field without initing the LinuxContainerExecutor
NodeManager nodeManager = new NodeManager();
NodeManager.NMContext nmContext =
nodeManager.createNMContext(null, null, null, false, conf);
Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext");
lceNmContext.setAccessible(true);
lceNmContext.set(mockExec, nmContext);

String appSubmitter = "nobody";
String cmd = String
.valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.
Expand Down Expand Up @@ -601,15 +612,30 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception {
LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime(
spyPrivilegedExecutor);
runtime.initialize(conf, null);
mockExec = new LinuxContainerExecutor(runtime);
mockExec.setConf(conf);
LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) {
@Override
protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
return spyPrivilegedExecutor;
}
};
lce.setConf(conf);

//set the private nmContext field without initing the LinuxContainerExecutor
NodeManager nodeManager = new NodeManager();
NodeManager.NMContext nmContext =
nodeManager.createNMContext(null, null, null, false, conf);
NodeManager.NMContext spyNmContext = spy(nmContext);

//initialize a mock NodeStatusUpdater
NodeStatusUpdaterImpl nodeStatusUpdater = mock(NodeStatusUpdaterImpl.class);
nmContext.setNodeStatusUpdater(nodeStatusUpdater);
//imitate a void method call on the NodeStatusUpdater when setting NM unhealthy.
doNothing().when(nodeStatusUpdater).reportException(any());

Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext");
lceNmContext.setAccessible(true);
lceNmContext.set(lce, nmContext);

InetSocketAddress address = InetSocketAddress.createUnresolved(
"localhost", 8040);
Path nmPrivateCTokensPath= new Path("file:///bin/nmPrivateCTokensPath");
Expand Down Expand Up @@ -672,6 +698,9 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
assertTrue("Unexpected exception " + e,
e.getMessage().contains("exit code"));
}

//verify that the NM was set unhealthy on PrivilegedOperationException
verify(nodeStatusUpdater, times(1)).reportException(any());
}

@Test
Expand Down

0 comments on commit f000942

Please sign in to comment.