From 5b1172bb49fa60a8b448cc566825cabbdcad6938 Mon Sep 17 00:00:00 2001 From: Mikhail Khachayants Date: Tue, 19 Jul 2022 10:46:32 +0300 Subject: [PATCH] fix deadlock between play kube and cleanup There was a deadlock between two concurrent processes: play kube and cleanup, that is called after container exit when RestartPolicy is used. Before the fix, the cleanup command didn't lock Pod's lock, so there was a possibility of obtaining two locks in different order in two processes. [NO NEW TESTS NEEDED] Closes #14921 Signed-off-by: Mikhail Khachayants --- libpod/container_api.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libpod/container_api.go b/libpod/container_api.go index 742eb6d3eb..10fc05f561 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -667,7 +667,24 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou // It also cleans up the network stack func (c *Container) Cleanup(ctx context.Context) error { if !c.batched { + // We need to lock the pod before we lock the container. + // To avoid races around cleaning up a container and the pod it is in. + if c.config.Pod != "" { + pod, err := c.runtime.state.Pod(c.config.Pod) + if err != nil { + return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), pod.ID()) + } + + // Lock the pod while we're cleaning up container + if pod.config.LockID == c.config.LockID { + return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID) + } + pod.lock.Lock() + defer pod.lock.Unlock() + } + c.lock.Lock() + defer c.lock.Unlock() if err := c.syncContainer(); err != nil {