From c1eccbddc2f1ba95b07e107ec80d3758b0c46f22 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 18 Oct 2024 15:59:48 -0400 Subject: [PATCH] reattach: don't kill process on failed reconnection During reattachment, we look to see if the process corresponding to the stored PID is running. If so, we try to connect to that process. If that fails, we kill the process under the presumption it's not working, and return ErrProcessNotFound. But during reattachment we don't know that the PID we have is still valid. Which means that the process we're trying to attach to may have exited and a different process has spawned with the same PID. This results in some unrelated process getting silently killed. This impacts Nomad when running the `rawexec` or `exec` task drivers, because the Nomad agent spawns an "executor" process via go-plugin to control the workloads, and these executors are left running when Nomad exits. If the executors die in the meantime (or the host is rebooted), then we can potentially kill a random process on the host. Because there's no way for go-plugin to know whether the process is a go-plugin server without connecting, this kill is never really safe. Remove it. Ref: https://github.com/hashicorp/nomad/issues/23969 Ref: https://hashicorp.atlassian.net/browse/NET-11233 --- internal/cmdrunner/cmd_reattach.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/cmdrunner/cmd_reattach.go b/internal/cmdrunner/cmd_reattach.go index dce1a86a..984c875d 100644 --- a/internal/cmdrunner/cmd_reattach.go +++ b/internal/cmdrunner/cmd_reattach.go @@ -28,7 +28,6 @@ func ReattachFunc(pid int, addr net.Addr) runner.ReattachFunc { // doesn't actually return an error if it can't find the process. conn, err := net.Dial(addr.Network(), addr.String()) if err != nil { - p.Kill() return nil, ErrProcessNotFound } conn.Close()