fix deadlock between play kube and cleanup

There was a deadlock between two concurrent processes: play kube and cleanup, that is called after container exit when RestartPolicy is used. Before the fix, the cleanup command didn't lock Pod's lock, so there was a possibility of obtaining two locks in different order in two processes. [NO NEW TESTS NEEDED] Closes #14921 Signed-off-by: Mikhail Khachayants <[email protected]>
containers · Jul 19, 2022 · e7dfdba · e7dfdba
1 parent 252fc7c
commit e7dfdba
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 5 deletions.
diff --git a/libpod/container_api.go b/libpod/container_api.go
@@ -663,9 +663,9 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
 	return result.code, result.err
 }
 
-// Cleanup unmounts all mount points in container and cleans up container storage
-// It also cleans up the network stack
-func (c *Container) Cleanup(ctx context.Context) error {
+// cleanupLocked unmounts all mount points in container and cleans up container storage
+// It also cleans up the network stack. The caller must hold pod.lock.
+func (c *Container) cleanupLocked(ctx context.Context) error {
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
@@ -714,6 +714,29 @@ func (c *Container) Cleanup(ctx context.Context) error {
 	return c.cleanup(ctx)
 }
 
+// Cleanup unmounts all mount points in container and cleans up container storage
+// It also cleans up the network stack
+func (c *Container) Cleanup(ctx context.Context) error {
+	// We need to lock the pod before we lock the container.
+	// To avoid races around cleaning up a container and the pod it is in.
+	if c.config.Pod != "" {
+		pod, err := c.runtime.state.Pod(c.config.Pod)
+		if err != nil {
+			return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err)
+		}
+
+		// Lock the pod while we're cleaning up container
+		if pod.config.LockID == c.config.LockID {
+			return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock)
+		}
+
+		pod.lock.Lock()
+		defer pod.lock.Unlock()
+	}
+
+	return c.cleanupLocked(ctx)
+}
+
 // Batch starts a batch operation on the given container
 // All commands in the passed function will execute under the same lock and
 // without synchronizing state after each operation

diff --git a/libpod/pod_api.go b/libpod/pod_api.go
@@ -177,7 +177,7 @@ func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
 			}
 
 			if cleanup {
-				return c.Cleanup(ctx)
+				return c.cleanupLocked(ctx)
 			}
 
 			return nil
@@ -286,7 +286,7 @@ func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) {
 		c := ctr
 		logrus.Debugf("Adding parallel job to clean up container %s", c.ID())
 		retChan := parallel.Enqueue(ctx, func() error {
-			return c.Cleanup(ctx)
+			return c.cleanupLocked(ctx)
 		})
 
 		ctrErrChan[c.ID()] = retChan