From 5b1172bb49fa60a8b448cc566825cabbdcad6938 Mon Sep 17 00:00:00 2001
From: Mikhail Khachayants <tyler92@inbox.ru>
Date: Tue, 19 Jul 2022 10:46:32 +0300
Subject: [PATCH] fix deadlock between play kube and cleanup

There was a deadlock between two concurrent processes: play kube and cleanup,
that is called after container exit when RestartPolicy is used. Before the fix,
the cleanup command didn't lock Pod's lock, so there was a possibility of
obtaining two locks in different order in two processes.

[NO NEW TESTS NEEDED]

Closes #14921

Signed-off-by: Mikhail Khachayants <tyler92@inbox.ru>
---
 libpod/container_api.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/libpod/container_api.go b/libpod/container_api.go
index 742eb6d3eb..10fc05f561 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -667,7 +667,24 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
 // It also cleans up the network stack
 func (c *Container) Cleanup(ctx context.Context) error {
 	if !c.batched {
+		// We need to lock the pod before we lock the container.
+		// To avoid races around cleaning up a container and the pod it is in.
+		if c.config.Pod != "" {
+			pod, err := c.runtime.state.Pod(c.config.Pod)
+			if err != nil {
+				return errors.Wrapf(err, "container %s is in pod %s, but pod cannot be retrieved", c.ID(), pod.ID())
+			}
+
+			// Lock the pod while we're cleaning up container
+			if pod.config.LockID == c.config.LockID {
+				return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID)
+			}
+			pod.lock.Lock()
+			defer pod.lock.Unlock()
+		}
+
 		c.lock.Lock()
+
 		defer c.lock.Unlock()
 
 		if err := c.syncContainer(); err != nil {