From c4472e1f539167cdc416d09caf7083188fd64c1b Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 2 Mar 2022 10:09:03 +0100
Subject: [PATCH] libpod: drop hack to set conmon cgroup pids.max=1

avoid forcing the pids.max = 1 limit to avoid cleanup processes, which
is racy since the cleanup processes could be triggered by the
container exiting; and it doesn't work with rootless when it cannot
use cgroups, i.e. cgroupfs and cgroup v1).

Closes: https://github.com/containers/podman/issues/13382

[NO NEW TESTS NEEDED] it doesn't add any new functionality

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 libpod/runtime_pod_linux.go | 54 +++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go
index 155ad5c2d9..74a218038e 100644
--- a/libpod/runtime_pod_linux.go
+++ b/libpod/runtime_pod_linux.go
@@ -1,3 +1,4 @@
+//go:build linux
 // +build linux
 
 package libpod
@@ -5,9 +6,11 @@ package libpod
 import (
 	"context"
 	"fmt"
+	"os"
 	"path"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"github.com/containers/common/pkg/cgroups"
 	"github.com/containers/common/pkg/config"
@@ -15,7 +18,6 @@ import (
 	"github.com/containers/podman/v4/libpod/events"
 	"github.com/containers/podman/v4/pkg/rootless"
 	"github.com/containers/podman/v4/pkg/specgen"
-	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -195,10 +197,15 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 	// Go through and lock all containers so we can operate on them all at
 	// once.
 	// First loop also checks that we are ready to go ahead and remove.
+	containersLocked := true
 	for _, ctr := range ctrs {
 		ctrLock := ctr.lock
 		ctrLock.Lock()
-		defer ctrLock.Unlock()
+		defer func() {
+			if containersLocked {
+				ctrLock.Unlock()
+			}
+		}()
 
 		// If we're force-removing, no need to check status.
 		if force {
@@ -216,32 +223,6 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 		}
 	}
 
-	// We're going to be removing containers.
-	// If we are Cgroupfs cgroup driver, to avoid races, we need to hit
-	// the pod and conmon Cgroups with a PID limit to prevent them from
-	// spawning any further processes (particularly cleanup processes) which
-	// would prevent removing the Cgroups.
-	if p.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager {
-		// Get the conmon Cgroup
-		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
-		conmonCgroup, err := cgroups.Load(conmonCgroupPath)
-		if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
-			logrus.Errorf("Retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
-		}
-
-		// New resource limits
-		resLimits := new(spec.LinuxResources)
-		resLimits.Pids = new(spec.LinuxPids)
-		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
-
-		// Don't try if we failed to retrieve the cgroup
-		if err == nil {
-			if err := conmonCgroup.Update(resLimits); err != nil {
-				logrus.Warnf("Error updating pod %s conmon cgroup PID limit: %v", p.ID(), err)
-			}
-		}
-	}
-
 	var removalErr error
 
 	ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
@@ -300,6 +281,12 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 		}
 	}
 
+	// let's unlock the containers so the cleanup processes can terminate their execution
+	for _, ctr := range ctrs {
+		ctr.lock.Unlock()
+	}
+	containersLocked = false
+
 	// Remove pod cgroup, if present
 	if p.state.CgroupPath != "" {
 		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
@@ -328,7 +315,16 @@ func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool,
 				}
 			}
 			if err == nil {
-				if err := conmonCgroup.Delete(); err != nil {
+				for attempts := 0; attempts < 50; attempts++ {
+					err = conmonCgroup.Delete()
+					if err == nil || os.IsNotExist(err) {
+						// success
+						err = nil
+						break
+					}
+					time.Sleep(time.Millisecond * 100)
+				}
+				if err != nil {
 					if removalErr == nil {
 						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
 					} else {