Skip to content

Commit

Permalink
Merge pull request #45801 from JuliaLang/sf/local_manager_rmprocs
Browse files Browse the repository at this point in the history
[Distributed] `kill(::LocalManager, ...)` should actually call `kill()`
  • Loading branch information
staticfloat authored Jun 24, 2022
2 parents b2b8ce8 + fd1c21b commit 51c8812
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
23 changes: 23 additions & 0 deletions stdlib/Distributed/src/managers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -725,3 +725,26 @@ function kill(manager::SSHManager, pid::Int, config::WorkerConfig)
cancel_ssh_tunnel(config)
nothing
end

function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeout = 15, term_timeout = 15)
# First, try sending `exit()` to the remote over the usual control channels
remote_do(exit, pid)

timer_task = @async begin
sleep(exit_timeout)

# Check to see if our child exited, and if not, send an actual kill signal
if !process_exited(config.process)
@warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
kill(config.process, Base.SIGTERM)

sleep(term_timeout)
if !process_exited(config.process)
@warn("Worker $(pid) ignored SIGTERM, sending SIGKILL")
kill(config.process, Base.SIGKILL)
end
end
end
errormonitor(timer_task)
return nothing
end
20 changes: 20 additions & 0 deletions stdlib/Distributed/test/distributed_exec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1856,6 +1856,26 @@ end end

include("splitrange.jl")

# Clear all workers for timeout tests (issue #45785)
rmprocs(workers())
begin
# First, assert that we get no messages when we close a cooperative worker
w = only(addprocs(1))
@test_nowarn begin
wait(rmprocs([w]))
end

# Next, ensure we get a log message when a worker does not cleanly exit
w = only(addprocs(1))
@test_logs (:warn, r"sending SIGTERM") begin
remote_do(w) do
# Cause the 'exit()' message that `rmprocs()` sends to do nothing
Core.eval(Base, :(exit() = nothing))
end
wait(rmprocs([w]))
end
end

# Run topology tests last after removing all workers, since a given
# cluster at any time only supports a single topology.
rmprocs(workers())
Expand Down

2 comments on commit 51c8812

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected. A full report can be found here.

Please sign in to comment.