From bc1a31ce6df6e5b131f32fc8cb75aac020ceec96 Mon Sep 17 00:00:00 2001 From: Matthew Heon Date: Fri, 14 Apr 2023 14:08:22 -0400 Subject: [PATCH] Make RemoveContainer return containers and pods removed This allows for accurate reporting of dependency removal, but the work is still incomplete: pods can be removed, but do not report the containers they removed as part of said removal. Will add this in a subsequent commit. Major note: I made ignoring no-such-container errors automatic once it has been determined that a container did exist in the first place. I can't think of any case where this would not be a TOCTOU - IE, no reason not to ignore them. The `--ignore` option to `podman rm` should still retain meaning as it will ignore errors from containers that didn't exist in the first place. Signed-off-by: Matthew Heon --- libpod/container_graph.go | 2 +- libpod/pod_api.go | 2 +- libpod/runtime_ctr.go | 125 ++++++++++++++++++++--------- libpod/runtime_img.go | 2 +- libpod/runtime_pod_common.go | 3 +- libpod/runtime_volume_common.go | 2 +- pkg/domain/infra/abi/containers.go | 64 +++++++++------ 7 files changed, 134 insertions(+), 66 deletions(-) diff --git a/libpod/container_graph.go b/libpod/container_graph.go index 1f0302f7d8..d12dc656a4 100644 --- a/libpod/container_graph.go +++ b/libpod/container_graph.go @@ -359,7 +359,7 @@ func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, } if !ctrErrored { - if err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, false, false, timeout); err != nil { + if _, _, err := node.container.runtime.removeContainer(ctx, node.container, force, false, true, false, false, false, timeout); err != nil { ctrErrored = true ctrErrors[node.id] = err } diff --git a/libpod/pod_api.go b/libpod/pod_api.go index 83add4843e..d2bd7d2c07 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -40,7 +40,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error { icLock := initCon.lock icLock.Lock() var time *uint - if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, false, false, time); err != nil { + if _, _, err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, false, false, time); err != nil { icLock.Unlock() return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err) } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 029feb06be..f371fdb39c 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -606,16 +606,22 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai // container will be removed also (iff the container is the sole user of the // volumes). Timeout sets the stop timeout for the container if it is running. func (r *Runtime) RemoveContainer(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error { - // NOTE: container will be locked down the road. - return r.removeContainer(ctx, c, force, removeVolume, false, false, false, false, timeout) + // NOTE: container will be locked down the road. There is no unlocked + // version of removeContainer. + _, _, err := r.removeContainer(ctx, c, force, removeVolume, false, false, false, false, timeout) + return err } // RemoveContainerAndDependencies removes the given container and all its // dependencies. This may include pods (if the container or any of its // dependencies is an infra or service container, the associated pod(s) will also // be removed). Otherwise, it functions identically to RemoveContainer. -func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) error { - // NOTE: container will be locked down the road. +// Returns two arrays: containers removed, and pods removed. These arrays are +// always returned, even if error is set, and indicate any containers that were +// successfully removed prior to the error. +func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Container, force bool, removeVolume bool, timeout *uint) (map[string]error, map[string]error, error) { + // NOTE: container will be locked down the road. There is no unlocked + // version of removeContainer. return r.removeContainer(ctx, c, force, removeVolume, false, false, true, false, timeout) } @@ -629,17 +635,25 @@ func (r *Runtime) RemoveContainerAndDependencies(ctx context.Context, c *Contain // removeDeps instructs Podman to remove dependency containers (and possible // a dependency pod if an infra container is involved). removeDeps conflicts // with removePod - pods have their own dependency management. -func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps, removeDeps, noLockPod bool, timeout *uint) error { +// noLockPod is used for recursive removeContainer calls when the pod is already +// locked. +// TODO: At this point we should just start accepting an options struct +func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, removeVolume, removePod, ignoreDeps, removeDeps, noLockPod bool, timeout *uint) (removedCtrs map[string]error, removedPods map[string]error, retErr error) { + removedCtrs = make(map[string]error) + removedPods = make(map[string]error) + if !c.valid { if ok, _ := r.state.HasContainer(c.ID()); !ok { // Container probably already removed // Or was never in the runtime to begin with - return nil + removedCtrs[c.ID()] = nil + return } } if removePod && removeDeps { - return fmt.Errorf("cannot remove dependencies while also removing a pod: %w", define.ErrInvalidArg) + retErr = fmt.Errorf("cannot remove dependencies while also removing a pod: %w", define.ErrInvalidArg) + return } // We need to refresh container config from the DB, to ensure that any @@ -650,7 +664,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // exist once we're done. newConf, err := r.state.GetContainerConfig(c.ID()) if err != nil { - return fmt.Errorf("retrieving container %s configuration from DB to remove: %w", c.ID(), err) + retErr = fmt.Errorf("retrieving container %s configuration from DB to remove: %w", c.ID(), err) + return } c.config = newConf @@ -665,23 +680,27 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if c.config.Pod != "" { pod, err = r.state.Pod(c.config.Pod) if err != nil { - return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), pod.ID(), err) + retErr = err + return } if !removePod { // Lock the pod while we're removing container if pod.config.LockID == c.config.LockID { - return fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) + retErr = fmt.Errorf("container %s and pod %s share lock ID %d: %w", c.ID(), pod.ID(), c.config.LockID, define.ErrWillDeadlock) + return } pod.lock.Lock() defer pod.lock.Unlock() if err := pod.updatePod(); err != nil { - return err + retErr = err + return } infraID := pod.state.InfraContainerID if c.ID() == infraID && !removeDeps { - return fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + retErr = fmt.Errorf("container %s is the infra container of pod %s and cannot be removed without removing the pod", c.ID(), pod.ID()) + return } } } @@ -699,12 +718,14 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } if !r.valid { - return define.ErrRuntimeStopped + retErr = define.ErrRuntimeStopped + return } // Update the container to get current state if err := c.syncContainer(); err != nil { - return err + retErr = err + return } serviceForPod := false @@ -715,10 +736,12 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if errors.Is(err, define.ErrNoSuchPod) { continue } - return err + retErr = err + return } if !removeDeps { - return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ",")) + retErr = fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ",")) + return } // If we are the service container for the pod we are a // member of: we need to remove that pod last, since @@ -729,8 +752,11 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } logrus.Infof("Removing pod %s as container %s is its service container", depPod.ID(), c.ID()) if err := r.RemovePod(ctx, depPod, true, force, timeout); err != nil { - return err + removedPods[depPod.ID()] = err + retErr = fmt.Errorf("error removing container %s dependency pods: %w", c.ID(), err) + return } + removedPods[depPod.ID()] = nil } } if serviceForPod || c.config.IsInfra { @@ -744,36 +770,44 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo logrus.Infof("Removing pod %s (dependency of container %s)", pod.ID(), c.ID()) if err := r.removePod(ctx, pod, true, force, timeout); err != nil { - return err + removedPods[pod.ID()] = err + retErr = fmt.Errorf("error removing container %s pod: %w", c.ID(), err) + return } - return nil + removedPods[pod.ID()] = nil + return } // If we're not force-removing, we need to check if we're in a good // state to remove. if !force { if err := c.checkReadyForRemoval(); err != nil { - return err + retErr = err + return } } if c.state.State == define.ContainerStatePaused { isV2, err := cgroups.IsCgroup2UnifiedMode() if err != nil { - return err + retErr = err + return } // cgroups v1 and v2 handle signals on paused processes differently if !isV2 { if err := c.unpause(); err != nil { - return err + retErr = err + return } } if err := c.ociRuntime.KillContainer(c, 9, false); err != nil { - return err + retErr = err + return } // Need to update container state to make sure we know it's stopped if err := c.waitForExitFileAndSync(); err != nil { - return err + retErr = err + return } } @@ -783,22 +817,33 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if !ignoreDeps { deps, err := r.state.ContainerInUse(c) if err != nil { - return err + retErr = err + return } if !removeDeps { if len(deps) != 0 { depsStr := strings.Join(deps, ", ") - return fmt.Errorf("container %s has dependent containers which must be removed before it: %s: %w", c.ID(), depsStr, define.ErrCtrExists) + retErr = fmt.Errorf("container %s has dependent containers which must be removed before it: %s: %w", c.ID(), depsStr, define.ErrCtrExists) + return } } for _, depCtr := range deps { dep, err := r.GetContainer(depCtr) if err != nil { - return err + retErr = err + return } logrus.Infof("Removing container %s (dependency of container %s)", dep.ID(), c.ID()) - if err := r.removeContainer(ctx, dep, force, removeVolume, false, false, true, true, timeout); err != nil { - return err + ctrs, pods, err := r.removeContainer(ctx, dep, force, removeVolume, false, false, true, true, timeout) + for rmCtr, err := range ctrs { + removedCtrs[rmCtr] = err + } + for rmPod, err := range pods { + removedPods[rmPod] = err + } + if err != nil { + retErr = err + return } } } @@ -812,7 +857,8 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo // Ignore ErrConmonDead - we couldn't retrieve the container's // exit code properly, but it's still stopped. if err := c.stop(time); err != nil && !errors.Is(err, define.ErrConmonDead) { - return fmt.Errorf("cannot remove container %s as it could not be stopped: %w", c.ID(), err) + retErr = fmt.Errorf("cannot remove container %s as it could not be stopped: %w", c.ID(), err) + return } // We unlocked as part of stop() above - there's a chance someone @@ -823,17 +869,19 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo if ok, _ := r.state.HasContainer(c.ID()); !ok { // When the container has already been removed, the OCI runtime directory remain. if err := c.cleanupRuntime(ctx); err != nil { - return fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err) + retErr = fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err) + return } - return nil + // Do not add to removed containers, someone else + // removed it. + return } } - var cleanupErr error reportErrorf := func(msg string, args ...any) { err := fmt.Errorf(msg, args...) // Always use fmt.Errorf instead of just logrus.Errorf(…) because the format string probably contains %w - if cleanupErr == nil { - cleanupErr = err + if retErr == nil { + retErr = err } else { logrus.Errorf("%s", err.Error()) } @@ -887,6 +935,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo reportErrorf("removing container %s from database: %w", c.ID(), err) } } + removedCtrs[c.ID()] = nil // Deallocate the container's lock if err := c.lock.Free(); err != nil { @@ -899,7 +948,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo c.newContainerEvent(events.Remove) if !removeVolume { - return cleanupErr + return } for _, v := range c.config.NamedVolumes { @@ -921,7 +970,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo } } - return cleanupErr + return } // EvictContainer removes the given container partial or full ID or name, and @@ -960,7 +1009,7 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol if err == nil { logrus.Infof("Container %s successfully retrieved from state, attempting normal removal", id) // Assume force = true for the evict case - err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, false, false, timeout) + _, _, err = r.removeContainer(ctx, tmpCtr, true, removeVolume, false, false, false, false, timeout) if !tmpCtr.valid { // If the container is marked invalid, remove succeeded // in kicking it out of the state - no need to continue. diff --git a/libpod/runtime_img.go b/libpod/runtime_img.go index d7495f1631..189889b092 100644 --- a/libpod/runtime_img.go +++ b/libpod/runtime_img.go @@ -46,7 +46,7 @@ func (r *Runtime) RemoveContainersForImageCallback(ctx context.Context) libimage return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } else { - if err := r.removeContainer(ctx, ctr, true, false, false, false, false, false, timeout); err != nil { + if _, _, err := r.removeContainer(ctx, ctr, true, false, false, false, false, false, timeout); err != nil { return fmt.Errorf("removing image %s: container %s using image could not be removed: %w", imageID, ctr.ID(), err) } } diff --git a/libpod/runtime_pod_common.go b/libpod/runtime_pod_common.go index 57b6ebce58..7709fbc4b1 100644 --- a/libpod/runtime_pod_common.go +++ b/libpod/runtime_pod_common.go @@ -142,7 +142,8 @@ func (r *Runtime) removeMalformedPod(ctx context.Context, p *Pod, ctrs []*Contai ctrNamedVolumes[vol.Name] = vol } - return r.removeContainer(ctx, ctr, force, false, true, true, false, false, timeout) + _, _, err := r.removeContainer(ctx, ctr, force, false, true, true, false, false, timeout) + return err }() if removalErr == nil { diff --git a/libpod/runtime_volume_common.go b/libpod/runtime_volume_common.go index e9a51cc40f..1d22ac8c2a 100644 --- a/libpod/runtime_volume_common.go +++ b/libpod/runtime_volume_common.go @@ -388,7 +388,7 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeo logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name()) - if err := r.removeContainer(ctx, ctr, force, false, false, false, false, false, timeout); err != nil { + if _, _, err := r.removeContainer(ctx, ctr, force, false, false, false, false, false, timeout); err != nil { return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err) } } diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index ce9670065b..9a09567f19 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -376,32 +376,24 @@ func (ic *ContainerEngine) ContainerRestart(ctx context.Context, namesOrIds []st return reports, nil } -func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Container, options entities.RmOptions) error { +func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Container, options entities.RmOptions) (map[string]error, map[string]error, error) { var err error + ctrs := make(map[string]error) + pods := make(map[string]error) if options.All || options.Depend { - err = ic.Libpod.RemoveContainerAndDependencies(ctx, ctr, options.Force, options.Volumes, options.Timeout) + ctrs, pods, err = ic.Libpod.RemoveContainerAndDependencies(ctx, ctr, options.Force, options.Volumes, options.Timeout) } else { err = ic.Libpod.RemoveContainer(ctx, ctr, options.Force, options.Volumes, options.Timeout) } + ctrs[ctr.ID()] = err if err == nil { - return nil + return ctrs, pods, nil } logrus.Debugf("Failed to remove container %s: %s", ctr.ID(), err.Error()) - if errors.Is(err, define.ErrNoSuchCtr) { - // Ignore if the container does not exist (anymore) when either - // it has been requested by the user of if the container is a - // service one. Service containers are removed along with its - // pods which in turn are removed along with their infra - // container. Hence, there is an inherent race when removing - // infra containers with service containers in parallel. - if options.Ignore || ctr.IsService() { - logrus.Debugf("Ignoring error (--allow-missing): %v", err) - return nil - } - } else if errors.Is(err, define.ErrCtrRemoved) { - return nil + if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) { + return ctrs, pods, nil } - return err + return ctrs, pods, err } func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string, options entities.RmOptions) ([]*reports.RmReport, error) { @@ -440,18 +432,44 @@ func (ic *ContainerEngine) ContainerRm(ctx context.Context, namesOrIds []string, } } + ctrsMap := make(map[string]error) + mapMutex := sync.Mutex{} + errMap, err := parallelctr.ContainerOp(ctx, libpodContainers, func(c *libpod.Container) error { - return ic.removeContainer(ctx, c, options) + mapMutex.Lock() + if _, ok := ctrsMap[c.ID()]; ok { + return nil + } + mapMutex.Unlock() + + ctrs, _, err := ic.removeContainer(ctx, c, options) + + mapMutex.Lock() + defer mapMutex.Unlock() + for ctr, err := range ctrs { + ctrsMap[ctr] = err + } + + return err }) if err != nil { return nil, err } for ctr, err := range errMap { + if _, ok := ctrsMap[ctr.ID()]; ok { + logrus.Debugf("Multiple results for container %s - attempted multiple removals?", ctr) + } + ctrsMap[ctr.ID()] = err + } + + for ctr, err := range ctrsMap { report := new(reports.RmReport) - report.Id = ctr.ID() - report.Err = err - report.RawInput = idToRawInput[ctr.ID()] + report.Id = ctr + if !(errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved)) { + report.Err = err + } + report.RawInput = idToRawInput[ctr] rmReports = append(rmReports, report) } @@ -945,7 +963,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri ExitCode: exitCode, }) if ctr.AutoRemove() { - if err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil { + if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil { logrus.Errorf("Removing container %s: %v", ctr.ID(), err) } } @@ -981,7 +999,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err) reports = append(reports, report) if ctr.AutoRemove() { - if err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil { + if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil { logrus.Errorf("Removing container %s: %v", ctr.ID(), err) } }