From 3b39eb133338ce825e12126f8bde416440b05333 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Mon, 5 Jun 2023 12:28:50 -0400 Subject: [PATCH 1/5] Include lock number in pod/container/volume inspect Being able to easily identify what lock has been allocated to a given Libpod object is only somewhat useful for debugging lock issues, but it's trivial to expose and I don't see any harm in doing so. Signed-off-by: Matt Heon --- libpod/container_inspect.go | 1 + libpod/define/container_inspect.go | 1 + libpod/define/pod_inspect.go | 2 ++ libpod/define/volume_inspect.go | 2 ++ libpod/pod_api.go | 1 + libpod/volume_inspect.go | 1 + 6 files changed, 8 insertions(+) diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index d6f404c701..99e6feaa62 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -166,6 +166,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver IsInfra: c.IsInfra(), IsService: c.IsService(), KubeExitCodePropagation: config.KubeExitCodePropagation.String(), + LockNumber: c.lock.ID(), } if config.RootfsImageID != "" { // May not be set if the container was created with --rootfs diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 0309a8dde0..457de626c9 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -691,6 +691,7 @@ type InspectContainerData struct { IsInfra bool `json:"IsInfra"` IsService bool `json:"IsService"` KubeExitCodePropagation string `json:"KubeExitCodePropagation"` + LockNumber uint32 `json:"lockNumber"` Config *InspectContainerConfig `json:"Config"` HostConfig *InspectContainerHostConfig `json:"HostConfig"` } diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index 9607bf868d..bcd34f15a1 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -85,6 +85,8 @@ type InspectPodData struct { BlkioWeightDevice []InspectBlkioWeightDevice `json:"blkio_weight_device,omitempty"` // RestartPolicy of the pod. RestartPolicy string `json:"RestartPolicy,omitempty"` + // Number of the pod's Libpod lock. + LockNumber uint32 } // InspectPodInfraConfig contains the configuration of the pod's infra diff --git a/libpod/define/volume_inspect.go b/libpod/define/volume_inspect.go index 4d6f12080a..c4b45a04f5 100644 --- a/libpod/define/volume_inspect.go +++ b/libpod/define/volume_inspect.go @@ -61,6 +61,8 @@ type InspectVolumeData struct { // StorageID is the ID of the container backing the volume in c/storage. // Only used with Image Volumes. StorageID string `json:"StorageID,omitempty"` + // LockNumber is the number of the volume's Libpod lock. + LockNumber uint32 } type VolumeReload struct { diff --git a/libpod/pod_api.go b/libpod/pod_api.go index f8e70cc8f5..8140b5e17b 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -742,6 +742,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { BlkioDeviceWriteBps: p.BlkiThrottleWriteBps(), CPUShares: p.CPUShares(), RestartPolicy: p.config.RestartPolicy, + LockNumber: p.lock.ID(), } return &inspectData, nil diff --git a/libpod/volume_inspect.go b/libpod/volume_inspect.go index 31fbd5eff0..e2300da471 100644 --- a/libpod/volume_inspect.go +++ b/libpod/volume_inspect.go @@ -65,6 +65,7 @@ func (v *Volume) Inspect() (*define.InspectVolumeData, error) { data.NeedsCopyUp = v.state.NeedsCopyUp data.NeedsChown = v.state.NeedsChown data.StorageID = v.config.StorageID + data.LockNumber = v.lock.ID() if v.config.Timeout != nil { data.Timeout = *v.config.Timeout From 1013696ad2e716ce6d1c79ddbf2690cb969d0624 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Mon, 5 Jun 2023 14:00:40 -0400 Subject: [PATCH 2/5] Add number of free locks to `podman info` This is a nice quality-of-life change that should help to debug situations where someone runs out of locks (usually when a bunch of unused volumes accumulate). Signed-off-by: Matt Heon --- libpod/define/info.go | 1 + libpod/info.go | 7 ++++ libpod/lock/file_lock_manager.go | 6 ++++ libpod/lock/in_memory_locks.go | 13 +++++++ libpod/lock/lock.go | 6 ++++ libpod/lock/shm/shm_lock.c | 50 +++++++++++++++++++++++++++ libpod/lock/shm/shm_lock.go | 15 ++++++++ libpod/lock/shm/shm_lock.h | 1 + libpod/lock/shm_lock_manager_linux.go | 10 ++++++ test/e2e/info_test.go | 24 +++++++++++++ 10 files changed, 133 insertions(+) diff --git a/libpod/define/info.go b/libpod/define/info.go index cc47f7f1b6..69f86c92c8 100644 --- a/libpod/define/info.go +++ b/libpod/define/info.go @@ -38,6 +38,7 @@ type HostInfo struct { DatabaseBackend string `json:"databaseBackend"` Distribution DistributionInfo `json:"distribution"` EventLogger string `json:"eventLogger"` + FreeLocks *uint32 `json:"freeLocks,omitempty"` Hostname string `json:"hostname"` IDMappings IDMappings `json:"idMappings,omitempty"` Kernel string `json:"kernel"` diff --git a/libpod/info.go b/libpod/info.go index a612a7bf61..e746fbdf95 100644 --- a/libpod/info.go +++ b/libpod/info.go @@ -99,6 +99,12 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) { if err != nil { return nil, err } + + locksFree, err := r.lockManager.AvailableLocks() + if err != nil { + return nil, fmt.Errorf("getting free locks: %w", err) + } + info := define.HostInfo{ Arch: runtime.GOARCH, BuildahVersion: buildah.Version, @@ -107,6 +113,7 @@ func (r *Runtime) hostInfo() (*define.HostInfo, error) { CPUs: runtime.NumCPU(), CPUUtilization: cpuUtil, Distribution: hostDistributionInfo, + FreeLocks: locksFree, LogDriver: r.config.Containers.LogDriver, EventLogger: r.eventer.String(), Hostname: host, diff --git a/libpod/lock/file_lock_manager.go b/libpod/lock/file_lock_manager.go index d89b0cc98a..e2fabc66a6 100644 --- a/libpod/lock/file_lock_manager.go +++ b/libpod/lock/file_lock_manager.go @@ -79,6 +79,12 @@ func (m *FileLockManager) FreeAllLocks() error { return m.locks.DeallocateAllLocks() } +// AvailableLocks returns the number of available locks. Since this is not +// limited in the file lock implementation, nil is returned. +func (locks *FileLockManager) AvailableLocks() (*uint32, error) { + return nil, nil +} + // FileLock is an individual shared memory lock. type FileLock struct { lockID uint32 diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go index f00f01032d..a482392eee 100644 --- a/libpod/lock/in_memory_locks.go +++ b/libpod/lock/in_memory_locks.go @@ -116,3 +116,16 @@ func (m *InMemoryManager) FreeAllLocks() error { return nil } + +// Get number of available locks +func (m *InMemoryManager) AvailableLocks() (*uint32, error) { + var count uint32 + + for _, lock := range m.locks { + if !lock.allocated { + count++ + } + } + + return &count, nil +} diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go index 4e1e2e215e..be4829e0cb 100644 --- a/libpod/lock/lock.go +++ b/libpod/lock/lock.go @@ -45,6 +45,12 @@ type Manager interface { // renumbering, where reasonable guarantees about other processes can be // made. FreeAllLocks() error + // NumAvailableLocks gets the number of remaining locks available to be + // allocated. + // Some lock managers do not have a maximum number of locks, and can + // allocate an unlimited number. These implementations should return + // a nil uin32. + AvailableLocks() (*uint32, error) } // Locker is similar to sync.Locker, but provides a method for freeing the lock diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c index 0dae806fcf..02e5f6700a 100644 --- a/libpod/lock/shm/shm_lock.c +++ b/libpod/lock/shm/shm_lock.c @@ -537,3 +537,53 @@ int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) { return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap])); } + +// Get the number of free locks. +// Returns a positive integer guaranteed to be less than UINT32_MAX on success, +// or negative errno values on failure. +// On success, the returned integer is the number of free semaphores. +int64_t available_locks(shm_struct_t *shm) { + int ret_code, i, count; + bitmap_t test_map; + int64_t free_locks = 0; + + if (shm == NULL) { + return -1 * EINVAL; + } + + // Lock the semaphore controlling access to the SHM segment. + // This isn't strictly necessary as we're only reading, but it seems safer. + ret_code = take_mutex(&(shm->segment_lock)); + if (ret_code != 0) { + return -1 * ret_code; + } + + // Loop through all bitmaps, counting number of allocated locks. + for (i = 0; i < shm->num_bitmaps; i++) { + // Short-circuit to catch fully-empty bitmaps quick. + if (shm->locks[i].bitmap == 0) { + free_locks += 32; + continue; + } + + // Use Kernighan's Algorithm to count bits set. Subtract from number of bits + // in the integer to get free bits, and thus free lock count. + test_map = shm->locks[i].bitmap; + count = 0; + while (test_map) { + test_map = test_map & (test_map - 1); + count++; + } + + free_locks += 32 - count; + } + + // Clear the mutex + ret_code = release_mutex(&(shm->segment_lock)); + if (ret_code != 0) { + return -1 * ret_code; + } + + // Return free lock count. + return free_locks; +} diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go index 3934883ac9..c7e38a3d78 100644 --- a/libpod/lock/shm/shm_lock.go +++ b/libpod/lock/shm/shm_lock.go @@ -266,6 +266,21 @@ func (locks *SHMLocks) UnlockSemaphore(sem uint32) error { return nil } +// GetFreeLocks gets the number of locks available to be allocated. +func (locks *SHMLocks) GetFreeLocks() (uint32, error) { + if !locks.valid { + return 0, fmt.Errorf("locks have already been closed: %w", syscall.EINVAL) + } + + retCode := C.available_locks(locks.lockStruct) + if retCode < 0 { + // Negative errno returned + return 0, syscall.Errno(-1 * retCode) + } + + return uint32(retCode), nil +} + func unlinkSHMLock(path string) error { cPath := C.CString(path) defer C.free(unsafe.Pointer(cPath)) diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h index 8796b43f44..ebecae218b 100644 --- a/libpod/lock/shm/shm_lock.h +++ b/libpod/lock/shm/shm_lock.h @@ -41,5 +41,6 @@ int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index); int32_t deallocate_all_semaphores(shm_struct_t *shm); int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index); int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index); +int64_t available_locks(shm_struct_t *shm); #endif diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go index fa20bc3530..9d618009fa 100644 --- a/libpod/lock/shm_lock_manager_linux.go +++ b/libpod/lock/shm_lock_manager_linux.go @@ -98,6 +98,16 @@ func (m *SHMLockManager) FreeAllLocks() error { return m.locks.DeallocateAllSemaphores() } +// AvailableLocks returns the number of free locks in the manager. +func (m *SHMLockManager) AvailableLocks() (*uint32, error) { + avail, err := m.locks.GetFreeLocks() + if err != nil { + return nil, err + } + + return &avail, nil +} + // SHMLock is an individual shared memory lock. type SHMLock struct { lockID uint32 diff --git a/test/e2e/info_test.go b/test/e2e/info_test.go index ad97ffda1d..e48ce2f854 100644 --- a/test/e2e/info_test.go +++ b/test/e2e/info_test.go @@ -6,6 +6,7 @@ import ( "os/exec" "os/user" "path/filepath" + "strconv" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -175,4 +176,27 @@ var _ = Describe("Podman Info", func() { Expect(session).To(Exit(0)) Expect(session.OutputToString()).To(Equal(want)) }) + + It("Podman info: check lock count", func() { + // This should not run on architectures and OSes that use the file locks backend. + // Which, for now, is Linux + RISCV and FreeBSD, neither of which are in CI - so + // no skips. + info1 := podmanTest.Podman([]string{"info", "--format", "{{ .Host.FreeLocks }}"}) + info1.WaitWithDefaultTimeout() + Expect(info1).To(Exit(0)) + free1, err := strconv.Atoi(info1.OutputToString()) + Expect(err).To(BeNil()) + + ctr := podmanTest.Podman([]string{"create", ALPINE, "top"}) + ctr.WaitWithDefaultTimeout() + Expect(ctr).To(Exit(0)) + + info2 := podmanTest.Podman([]string{"info", "--format", "{{ .Host.FreeLocks }}"}) + info2.WaitWithDefaultTimeout() + Expect(info2).To(Exit(0)) + free2, err := strconv.Atoi(info2.OutputToString()) + Expect(err).To(BeNil()) + + Expect(free1).To(Equal(free2 + 1)) + }) }) From 0948c078c2f66311e346c3cbed7b9bdd153032e9 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Mon, 5 Jun 2023 14:47:12 -0400 Subject: [PATCH 3/5] Add a new hidden command, podman system locks This is a general debug command that identifies any lock conflicts that could lead to a deadlock. It's only intended for Libpod developers (while it does tell you if you need to run `podman system renumber`, you should never have to do that anyways, and the next commit will include a lot more technical info in the output that no one except a Libpod dev will want). Hence, hidden command, and only implemented for the local driver (recommend just running it by SSHing into a `podman machine` VM in the unlikely case it's needed by remote Podman). These conflicts should normally never happen, but having a command like this is useful for debugging deadlock conditions when they do occur. Signed-off-by: Matt Heon --- cmd/podman/system/locks.go | 48 +++++++++++++++++ libpod/runtime.go | 70 +++++++++++++++++++++++++ pkg/domain/entities/engine_container.go | 1 + pkg/domain/entities/system.go | 6 +++ pkg/domain/infra/abi/system.go | 10 ++++ pkg/domain/infra/tunnel/system.go | 4 ++ 6 files changed, 139 insertions(+) create mode 100644 cmd/podman/system/locks.go diff --git a/cmd/podman/system/locks.go b/cmd/podman/system/locks.go new file mode 100644 index 0000000000..0b03bc0fbe --- /dev/null +++ b/cmd/podman/system/locks.go @@ -0,0 +1,48 @@ +package system + +import ( + "fmt" + + "github.com/containers/podman/v4/cmd/podman/registry" + "github.com/containers/podman/v4/cmd/podman/validate" + "github.com/spf13/cobra" +) + +var ( + locksCommand = &cobra.Command{ + Use: "locks", + Short: "Debug Libpod's use of locks, identifying any potential conflicts", + Args: validate.NoArgs, + Hidden: true, + RunE: func(cmd *cobra.Command, args []string) error { + return runLocks() + }, + Example: "podman system locks", + } +) + +func init() { + registry.Commands = append(registry.Commands, registry.CliCommand{ + Command: locksCommand, + Parent: systemCmd, + }) +} +func runLocks() error { + report, err := registry.ContainerEngine().Locks(registry.Context()) + if err != nil { + return err + } + + for lockNum, objects := range report.LockConflicts { + fmt.Printf("Lock %d is in use by the following\n:", lockNum) + for _, obj := range objects { + fmt.Printf("\t%s\n", obj) + } + } + + if len(report.LockConflicts) > 0 { + fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n") + } + + return nil +} diff --git a/libpod/runtime.go b/libpod/runtime.go index 98956a5e16..f0c29c79be 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1188,3 +1188,73 @@ func (r *Runtime) RemoteURI() string { func (r *Runtime) SetRemoteURI(uri string) { r.config.Engine.RemoteURI = uri } + + +// Get information on potential lock conflicts. +// Returns a map of lock number to object(s) using the lock, formatted as +// "container " or "volume " or "pod ". +// If the map returned is not empty, you should immediately renumber locks on +// the runtime, because you have a deadlock waiting to happen. +func (r *Runtime) LockConflicts() (map[uint32][]string, error) { + // Make an internal map to store what lock is associated with what + locksInUse := make(map[uint32][]string) + + ctrs, err := r.state.AllContainers(false) + if err != nil { + return nil, err + } + for _, ctr := range ctrs { + lockNum := ctr.lock.ID() + ctrString := fmt.Sprintf("container %s", ctr.ID()) + locksArr, ok := locksInUse[lockNum] + if ok { + locksInUse[lockNum] = append(locksArr, ctrString) + } else { + locksInUse[lockNum] = []string{ctrString} + } + } + + pods, err := r.state.AllPods() + if err != nil { + return nil, err + } + for _, pod := range pods { + lockNum := pod.lock.ID() + podString := fmt.Sprintf("pod %s", pod.ID()) + locksArr, ok := locksInUse[lockNum] + if ok { + locksInUse[lockNum] = append(locksArr, podString) + } else { + locksInUse[lockNum] = []string{podString} + } + } + + volumes, err := r.state.AllVolumes() + if err != nil { + return nil, err + } + for _, vol := range volumes { + lockNum := vol.lock.ID() + volString := fmt.Sprintf("volume %s", vol.Name()) + locksArr, ok := locksInUse[lockNum] + if ok { + locksInUse[lockNum] = append(locksArr, volString) + } else { + locksInUse[lockNum] = []string{volString} + } + } + + // Now go through and find any entries with >1 item associated + toReturn := make(map[uint32][]string) + for lockNum, objects := range locksInUse { + // If debug logging is requested, just spit out *every* lock in + // use. + logrus.Debugf("Lock number %d is in use by %v", lockNum, objects) + + if len(objects) > 1 { + toReturn[lockNum] = objects + } + } + + return toReturn, nil +} diff --git a/pkg/domain/entities/engine_container.go b/pkg/domain/entities/engine_container.go index b5da16c491..7983502339 100644 --- a/pkg/domain/entities/engine_container.go +++ b/pkg/domain/entities/engine_container.go @@ -62,6 +62,7 @@ type ContainerEngine interface { //nolint:interfacebloat HealthCheckRun(ctx context.Context, nameOrID string, options HealthCheckOptions) (*define.HealthCheckResults, error) Info(ctx context.Context) (*define.Info, error) KubeApply(ctx context.Context, body io.Reader, opts ApplyOptions) error + Locks(ctx context.Context) (*LocksReport, error) NetworkConnect(ctx context.Context, networkname string, options NetworkConnectOptions) error NetworkCreate(ctx context.Context, network types.Network, createOptions *types.NetworkCreateOptions) (*types.Network, error) NetworkUpdate(ctx context.Context, networkname string, options NetworkUpdateOptions) error diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go index 5d7ef92d74..28517b1aab 100644 --- a/pkg/domain/entities/system.go +++ b/pkg/domain/entities/system.go @@ -120,3 +120,9 @@ type AuthReport struct { IdentityToken string Status string } + +// LocksReport describes any conflicts in Libpod's lock allocations that could +// lead to deadlocks. +type LocksReport struct { + LockConflicts map[uint32][]string +} diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go index 09cae3b3f3..730d74623b 100644 --- a/pkg/domain/infra/abi/system.go +++ b/pkg/domain/infra/abi/system.go @@ -429,3 +429,13 @@ func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionR report.Client = &v return &report, err } + +func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) { + var report entities.LocksReport + conflicts, err := ic.Libpod.LockConflicts() + if err != nil { + return nil, err + } + report.LockConflicts = conflicts + return &report, nil +} diff --git a/pkg/domain/infra/tunnel/system.go b/pkg/domain/infra/tunnel/system.go index 29737775d5..7096c2ccd5 100644 --- a/pkg/domain/infra/tunnel/system.go +++ b/pkg/domain/infra/tunnel/system.go @@ -34,3 +34,7 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionReport, error) { return system.Version(ic.ClientCtx, nil) } + +func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) { + return nil, errors.New("locks is not supported on remote clients") +} From 4fda7936c518b8bde23899eda9e385a2c7d3b290 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Mon, 5 Jun 2023 15:45:59 -0400 Subject: [PATCH 4/5] `system locks` now reports held locks To debug a deadlock, we really want to know what lock is actually locked, so we can figure out what is using that lock. This PR adds support for this, using trylock to check if every lock on the system is free or in use. Will really need to be run a few times in quick succession to verify that it's not a transient lock and it's actually stuck, but that's not really a big deal. Signed-off-by: Matt Heon --- cmd/podman/system/locks.go | 16 +++-- libpod/lock/file_lock_manager.go | 12 +++- libpod/lock/in_memory_locks.go | 17 +++++ libpod/lock/lock.go | 4 ++ libpod/lock/shm/shm_lock.c | 71 ++++++++++++++++++--- libpod/lock/shm/shm_lock.go | 25 ++++++++ libpod/lock/shm/shm_lock.h | 1 + libpod/lock/shm/shm_lock_nocgo.go | 12 ++++ libpod/lock/shm_lock_manager_linux.go | 4 ++ libpod/lock/shm_lock_manager_unsupported.go | 10 +++ libpod/runtime.go | 24 +++++-- pkg/domain/entities/system.go | 1 + pkg/domain/infra/abi/system.go | 3 +- test/e2e/info_test.go | 4 +- 14 files changed, 178 insertions(+), 26 deletions(-) diff --git a/cmd/podman/system/locks.go b/cmd/podman/system/locks.go index 0b03bc0fbe..e1501c9863 100644 --- a/cmd/podman/system/locks.go +++ b/cmd/podman/system/locks.go @@ -10,9 +10,9 @@ import ( var ( locksCommand = &cobra.Command{ - Use: "locks", - Short: "Debug Libpod's use of locks, identifying any potential conflicts", - Args: validate.NoArgs, + Use: "locks", + Short: "Debug Libpod's use of locks, identifying any potential conflicts", + Args: validate.NoArgs, Hidden: true, RunE: func(cmd *cobra.Command, args []string) error { return runLocks() @@ -24,7 +24,7 @@ var ( func init() { registry.Commands = append(registry.Commands, registry.CliCommand{ Command: locksCommand, - Parent: systemCmd, + Parent: systemCmd, }) } func runLocks() error { @@ -41,7 +41,13 @@ func runLocks() error { } if len(report.LockConflicts) > 0 { - fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n") + fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n\n") + } else { + fmt.Printf("\nNo lock conflicts have been detected, system safe from deadlocks.\n\n") + } + + for _, lockNum := range report.LocksHeld { + fmt.Printf("Lock %d is presently being held\n", lockNum) } return nil diff --git a/libpod/lock/file_lock_manager.go b/libpod/lock/file_lock_manager.go index e2fabc66a6..4a82e687cc 100644 --- a/libpod/lock/file_lock_manager.go +++ b/libpod/lock/file_lock_manager.go @@ -1,6 +1,7 @@ package lock import ( + "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/libpod/lock/file" ) @@ -81,10 +82,19 @@ func (m *FileLockManager) FreeAllLocks() error { // AvailableLocks returns the number of available locks. Since this is not // limited in the file lock implementation, nil is returned. -func (locks *FileLockManager) AvailableLocks() (*uint32, error) { +func (m *FileLockManager) AvailableLocks() (*uint32, error) { return nil, nil } +// LocksHeld returns any locks that are presently locked. +// It is not implemented for the file lock backend. +// It ought to be possible, but my motivation to dig into c/storage and add +// trylock semantics to the filelocker implementation for an uncommonly-used +// lock backend is lacking. +func (m *FileLockManager) LocksHeld() ([]uint32, error) { + return nil, define.ErrNotImplemented +} + // FileLock is an individual shared memory lock. type FileLock struct { lockID uint32 diff --git a/libpod/lock/in_memory_locks.go b/libpod/lock/in_memory_locks.go index a482392eee..e62b25ac15 100644 --- a/libpod/lock/in_memory_locks.go +++ b/libpod/lock/in_memory_locks.go @@ -129,3 +129,20 @@ func (m *InMemoryManager) AvailableLocks() (*uint32, error) { return &count, nil } + +// Get any locks that are presently being held. +// Useful for debugging deadlocks. +func (m *InMemoryManager) LocksHeld() ([]uint32, error) { + //nolint:prealloc + var locks []uint32 + + for _, lock := range m.locks { + if lock.lock.TryLock() { + lock.lock.Unlock() + continue + } + locks = append(locks, lock.ID()) + } + + return locks, nil +} diff --git a/libpod/lock/lock.go b/libpod/lock/lock.go index be4829e0cb..22f3d20317 100644 --- a/libpod/lock/lock.go +++ b/libpod/lock/lock.go @@ -51,6 +51,10 @@ type Manager interface { // allocate an unlimited number. These implementations should return // a nil uin32. AvailableLocks() (*uint32, error) + // Get a list of locks that are currently locked. + // This may not be supported by some drivers, depending on the exact + // backend implementation in use. + LocksHeld() ([]uint32, error) } // Locker is similar to sync.Locker, but provides a method for freeing the lock diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c index 02e5f6700a..dc98ce91fd 100644 --- a/libpod/lock/shm/shm_lock.c +++ b/libpod/lock/shm/shm_lock.c @@ -20,12 +20,18 @@ static size_t compute_shm_size(uint32_t num_bitmaps) { // Handles exceptional conditions, including a mutex locked by a process that // died holding it. // Returns 0 on success, or positive errno on failure. -static int take_mutex(pthread_mutex_t *mutex) { +static int take_mutex(pthread_mutex_t *mutex, bool trylock) { int ret_code; - do { - ret_code = pthread_mutex_lock(mutex); - } while(ret_code == EAGAIN); + if (!trylock) { + do { + ret_code = pthread_mutex_lock(mutex); + } while(ret_code == EAGAIN); + } else { + do { + ret_code = pthread_mutex_trylock(mutex); + } while(ret_code == EAGAIN); + } if (ret_code == EOWNERDEAD) { // The previous owner of the mutex died while holding it @@ -309,7 +315,7 @@ int64_t allocate_semaphore(shm_struct_t *shm) { } // Lock the semaphore controlling access to our shared memory - ret_code = take_mutex(&(shm->segment_lock)); + ret_code = take_mutex(&(shm->segment_lock), false); if (ret_code != 0) { return -1 * ret_code; } @@ -383,7 +389,7 @@ int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) { test_map = 0x1 << index_in_bitmap; // Lock the mutex controlling access to our shared memory - ret_code = take_mutex(&(shm->segment_lock)); + ret_code = take_mutex(&(shm->segment_lock), false); if (ret_code != 0) { return -1 * ret_code; } @@ -436,7 +442,7 @@ int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) { test_map = 0x1 << index_in_bitmap; // Lock the mutex controlling access to our shared memory - ret_code = take_mutex(&(shm->segment_lock)); + ret_code = take_mutex(&(shm->segment_lock), false); if (ret_code != 0) { return -1 * ret_code; } @@ -475,7 +481,7 @@ int32_t deallocate_all_semaphores(shm_struct_t *shm) { } // Lock the mutex controlling access to our shared memory - ret_code = take_mutex(&(shm->segment_lock)); + ret_code = take_mutex(&(shm->segment_lock), false); if (ret_code != 0) { return -1 * ret_code; } @@ -513,7 +519,7 @@ int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) { bitmap_index = sem_index / BITMAP_SIZE; index_in_bitmap = sem_index % BITMAP_SIZE; - return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap])); + return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]), false); } // Unlock a given semaphore @@ -553,7 +559,7 @@ int64_t available_locks(shm_struct_t *shm) { // Lock the semaphore controlling access to the SHM segment. // This isn't strictly necessary as we're only reading, but it seems safer. - ret_code = take_mutex(&(shm->segment_lock)); + ret_code = take_mutex(&(shm->segment_lock), false); if (ret_code != 0) { return -1 * ret_code; } @@ -587,3 +593,48 @@ int64_t available_locks(shm_struct_t *shm) { // Return free lock count. return free_locks; } + +// Attempt to take a given semaphore. If successfully taken, it is immediately +// released before the function returns. +// Used to check if a semaphore is in use, to detect potential deadlocks where a +// lock has not been released for an extended period of time. +// Note that this is NOT POSIX trylock as the lock is immediately released if +// taken. +// Returns negative errno on failure. +// On success, returns 1 if the lock was successfully taken, and 0 if it was +// not. +int32_t try_lock(shm_struct_t *shm, uint32_t sem_index) { + int bitmap_index, index_in_bitmap, ret_code; + pthread_mutex_t *mutex; + + if (shm == NULL) { + return -1 * EINVAL; + } + + if (sem_index >= shm->num_locks) { + return -1 * EINVAL; + } + + bitmap_index = sem_index / BITMAP_SIZE; + index_in_bitmap = sem_index % BITMAP_SIZE; + + mutex = &(shm->locks[bitmap_index].locks[index_in_bitmap]); + + ret_code = take_mutex(mutex, true); + + if (ret_code == EBUSY) { + // Did not successfully take the lock + return 0; + } else if (ret_code != 0) { + // Another, unrelated error + return -1 * ret_code; + } + + // Lock taken successfully, unlock and return. + ret_code = release_mutex(mutex); + if (ret_code != 0) { + return -1 * ret_code; + } + + return 1; +} diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go index c7e38a3d78..239d0e47a7 100644 --- a/libpod/lock/shm/shm_lock.go +++ b/libpod/lock/shm/shm_lock.go @@ -281,6 +281,31 @@ func (locks *SHMLocks) GetFreeLocks() (uint32, error) { return uint32(retCode), nil } +// Get a list of locks that are currently taken. +func (locks *SHMLocks) GetTakenLocks() ([]uint32, error) { + if !locks.valid { + return nil, fmt.Errorf("locks have already been closed: %w", syscall.EINVAL) + } + + var usedLocks []uint32 + + // I don't think we need to lock the OS thread here, since the lock (if + // taken) is immediately released, and Go shouldn't reschedule the CGo + // to another thread before the function finished executing. + var i uint32 + for i = 0; i < locks.maxLocks; i++ { + retCode := C.try_lock(locks.lockStruct, C.uint32_t(i)) + if retCode < 0 { + return nil, syscall.Errno(-1 * retCode) + } + if retCode == 0 { + usedLocks = append(usedLocks, i) + } + } + + return usedLocks, nil +} + func unlinkSHMLock(path string) error { cPath := C.CString(path) defer C.free(unsafe.Pointer(cPath)) diff --git a/libpod/lock/shm/shm_lock.h b/libpod/lock/shm/shm_lock.h index ebecae218b..a09cc81e2c 100644 --- a/libpod/lock/shm/shm_lock.h +++ b/libpod/lock/shm/shm_lock.h @@ -42,5 +42,6 @@ int32_t deallocate_all_semaphores(shm_struct_t *shm); int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index); int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index); int64_t available_locks(shm_struct_t *shm); +int32_t try_lock(shm_struct_t *shm, uint32_t sem_index); #endif diff --git a/libpod/lock/shm/shm_lock_nocgo.go b/libpod/lock/shm/shm_lock_nocgo.go index 31fc022235..7e0ccd61f0 100644 --- a/libpod/lock/shm/shm_lock_nocgo.go +++ b/libpod/lock/shm/shm_lock_nocgo.go @@ -101,3 +101,15 @@ func (locks *SHMLocks) UnlockSemaphore(sem uint32) error { logrus.Error("Locks are not supported without cgo") return nil } + +// GetFreeLocks gets the number of locks available to be allocated. +func (locks *SHMLocks) GetFreeLocks() (uint32, error) { + logrus.Error("Locks are not supported without cgo") + return 0, nil +} + +// Get a list of locks that are currently taken. +func (locks *SHMLocks) GetTakenLocks() ([]uint32, error) { + logrus.Error("Locks are not supported without cgo") + return nil, nil +} diff --git a/libpod/lock/shm_lock_manager_linux.go b/libpod/lock/shm_lock_manager_linux.go index 9d618009fa..344183eb59 100644 --- a/libpod/lock/shm_lock_manager_linux.go +++ b/libpod/lock/shm_lock_manager_linux.go @@ -108,6 +108,10 @@ func (m *SHMLockManager) AvailableLocks() (*uint32, error) { return &avail, nil } +func (m *SHMLockManager) LocksHeld() ([]uint32, error) { + return m.locks.GetTakenLocks() +} + // SHMLock is an individual shared memory lock. type SHMLock struct { lockID uint32 diff --git a/libpod/lock/shm_lock_manager_unsupported.go b/libpod/lock/shm_lock_manager_unsupported.go index d578359aba..9ca01d57dd 100644 --- a/libpod/lock/shm_lock_manager_unsupported.go +++ b/libpod/lock/shm_lock_manager_unsupported.go @@ -33,3 +33,13 @@ func (m *SHMLockManager) RetrieveLock(id string) (Locker, error) { func (m *SHMLockManager) FreeAllLocks() error { return fmt.Errorf("not supported") } + +// AvailableLocks is not supported on this platform +func (m *SHMLockManager) AvailableLocks() (*uint32, error) { + return nil, fmt.Errorf("not supported") +} + +// LocksHeld is not supported on this platform +func (m *SHMLockManager) LocksHeld() ([]uint32, error) { + return nil, fmt.Errorf("not supported") +} diff --git a/libpod/runtime.go b/libpod/runtime.go index f0c29c79be..6ec0a6c010 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1189,19 +1189,19 @@ func (r *Runtime) SetRemoteURI(uri string) { r.config.Engine.RemoteURI = uri } - // Get information on potential lock conflicts. // Returns a map of lock number to object(s) using the lock, formatted as -// "container " or "volume " or "pod ". +// "container " or "volume " or "pod ", and an array of locks that +// are currently being held, formatted as []uint32. // If the map returned is not empty, you should immediately renumber locks on // the runtime, because you have a deadlock waiting to happen. -func (r *Runtime) LockConflicts() (map[uint32][]string, error) { +func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) { // Make an internal map to store what lock is associated with what locksInUse := make(map[uint32][]string) ctrs, err := r.state.AllContainers(false) if err != nil { - return nil, err + return nil, nil, err } for _, ctr := range ctrs { lockNum := ctr.lock.ID() @@ -1216,7 +1216,7 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, error) { pods, err := r.state.AllPods() if err != nil { - return nil, err + return nil, nil, err } for _, pod := range pods { lockNum := pod.lock.ID() @@ -1231,7 +1231,7 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, error) { volumes, err := r.state.AllVolumes() if err != nil { - return nil, err + return nil, nil, err } for _, vol := range volumes { lockNum := vol.lock.ID() @@ -1256,5 +1256,15 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, error) { } } - return toReturn, nil + locksHeld, err := r.lockManager.LocksHeld() + if err != nil { + if errors.Is(err, define.ErrNotImplemented) { + logrus.Warnf("Could not retrieve currently taken locks as the lock backend does not support this operation") + return toReturn, []uint32{}, nil + } + + return nil, nil, err + } + + return toReturn, locksHeld, nil } diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go index 28517b1aab..473db35306 100644 --- a/pkg/domain/entities/system.go +++ b/pkg/domain/entities/system.go @@ -125,4 +125,5 @@ type AuthReport struct { // lead to deadlocks. type LocksReport struct { LockConflicts map[uint32][]string + LocksHeld []uint32 } diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go index 730d74623b..bc5a90b912 100644 --- a/pkg/domain/infra/abi/system.go +++ b/pkg/domain/infra/abi/system.go @@ -432,10 +432,11 @@ func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionR func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) { var report entities.LocksReport - conflicts, err := ic.Libpod.LockConflicts() + conflicts, held, err := ic.Libpod.LockConflicts() if err != nil { return nil, err } report.LockConflicts = conflicts + report.LocksHeld = held return &report, nil } diff --git a/test/e2e/info_test.go b/test/e2e/info_test.go index e48ce2f854..b124067bde 100644 --- a/test/e2e/info_test.go +++ b/test/e2e/info_test.go @@ -185,7 +185,7 @@ var _ = Describe("Podman Info", func() { info1.WaitWithDefaultTimeout() Expect(info1).To(Exit(0)) free1, err := strconv.Atoi(info1.OutputToString()) - Expect(err).To(BeNil()) + Expect(err).To(Not(HaveOccurred())) ctr := podmanTest.Podman([]string{"create", ALPINE, "top"}) ctr.WaitWithDefaultTimeout() @@ -195,7 +195,7 @@ var _ = Describe("Podman Info", func() { info2.WaitWithDefaultTimeout() Expect(info2).To(Exit(0)) free2, err := strconv.Atoi(info2.OutputToString()) - Expect(err).To(BeNil()) + Expect(err).To(Not(HaveOccurred())) Expect(free1).To(Equal(free2 + 1)) }) From 944673c88331a145ee2119d44138741dc970d834 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Tue, 6 Jun 2023 09:45:32 -0400 Subject: [PATCH 5/5] Address review feedback and add manpage notes The inspect format for `.LockNumber` needed to be documented. Signed-off-by: Matt Heon --- cmd/podman/system/locks.go | 2 +- .../markdown/podman-container-inspect.1.md.in | 1 + .../markdown/podman-pod-inspect.1.md.in | 1 + .../markdown/podman-volume-inspect.1.md | 1 + docs/source/markdown/podman-volume-ls.1.md.in | 1 + libpod/lock/shm/shm_lock.c | 4 ++-- libpod/runtime.go | 21 +++---------------- test/e2e/info_test.go | 5 ++++- 8 files changed, 14 insertions(+), 22 deletions(-) diff --git a/cmd/podman/system/locks.go b/cmd/podman/system/locks.go index e1501c9863..71f95d34d5 100644 --- a/cmd/podman/system/locks.go +++ b/cmd/podman/system/locks.go @@ -43,7 +43,7 @@ func runLocks() error { if len(report.LockConflicts) > 0 { fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n\n") } else { - fmt.Printf("\nNo lock conflicts have been detected, system safe from deadlocks.\n\n") + fmt.Printf("\nNo lock conflicts have been detected.\n\n") } for _, lockNum := range report.LocksHeld { diff --git a/docs/source/markdown/podman-container-inspect.1.md.in b/docs/source/markdown/podman-container-inspect.1.md.in index b9e650690a..c8b45f30b8 100644 --- a/docs/source/markdown/podman-container-inspect.1.md.in +++ b/docs/source/markdown/podman-container-inspect.1.md.in @@ -43,6 +43,7 @@ Valid placeholders for the Go template are listed below: | .IsInfra | Is this an infra container? (string: true/false) | | .IsService | Is this a service container? (string: true/false) | | .KubeExitCodePropagation | Kube exit-code propagation (string) | +| .LockNumber | Number of the container's Libpod lock | | .MountLabel | SELinux label of mount (string) | | .Mounts | Mounts (array of strings) | | .Name | Container name (string) | diff --git a/docs/source/markdown/podman-pod-inspect.1.md.in b/docs/source/markdown/podman-pod-inspect.1.md.in index 7875a4de56..e196d4fcff 100644 --- a/docs/source/markdown/podman-pod-inspect.1.md.in +++ b/docs/source/markdown/podman-pod-inspect.1.md.in @@ -44,6 +44,7 @@ Valid placeholders for the Go template are listed below: | .InfraContainerID | Pod infrastructure ID | | .InspectPodData ... | Nested structure, for experts only | | .Labels | Pod labels | +| .LockNumber | Number of the pod's Libpod lock | | .MemoryLimit | Memory limit, bytes | | .MemorySwap | Memory swap limit, in bytes | | .Mounts | Mounts | diff --git a/docs/source/markdown/podman-volume-inspect.1.md b/docs/source/markdown/podman-volume-inspect.1.md index 32f5519121..1f0d4096fa 100644 --- a/docs/source/markdown/podman-volume-inspect.1.md +++ b/docs/source/markdown/podman-volume-inspect.1.md @@ -33,6 +33,7 @@ Valid placeholders for the Go template are listed below: | .Driver | Volume driver | | .GID | GID the volume was created with | | .Labels | Label information associated with the volume | +| .LockNumber | Number of the volume's Libpod lock | | .MountCount | Number of times the volume is mounted | | .Mountpoint | Source of volume mount point | | .Name | Volume name | diff --git a/docs/source/markdown/podman-volume-ls.1.md.in b/docs/source/markdown/podman-volume-ls.1.md.in index 9240a3c643..3ad5ae82e5 100644 --- a/docs/source/markdown/podman-volume-ls.1.md.in +++ b/docs/source/markdown/podman-volume-ls.1.md.in @@ -42,6 +42,7 @@ Valid placeholders for the Go template are listed below: | .GID | GID of volume | | .InspectVolumeData ... | Don't use | | .Labels | Label information associated with the volume | +| .LockNumber | Number of the volume's Libpod lock | | .MountCount | Number of times the volume is mounted | | .Mountpoint | Source of volume mount point | | .Name | Volume name | diff --git a/libpod/lock/shm/shm_lock.c b/libpod/lock/shm/shm_lock.c index dc98ce91fd..e86dc9d879 100644 --- a/libpod/lock/shm/shm_lock.c +++ b/libpod/lock/shm/shm_lock.c @@ -568,7 +568,7 @@ int64_t available_locks(shm_struct_t *shm) { for (i = 0; i < shm->num_bitmaps; i++) { // Short-circuit to catch fully-empty bitmaps quick. if (shm->locks[i].bitmap == 0) { - free_locks += 32; + free_locks += sizeof(bitmap_t) * 8; continue; } @@ -581,7 +581,7 @@ int64_t available_locks(shm_struct_t *shm) { count++; } - free_locks += 32 - count; + free_locks += (sizeof(bitmap_t) * 8) - count; } // Clear the mutex diff --git a/libpod/runtime.go b/libpod/runtime.go index 6ec0a6c010..c67b70c6ba 100644 --- a/libpod/runtime.go +++ b/libpod/runtime.go @@ -1206,12 +1206,7 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) { for _, ctr := range ctrs { lockNum := ctr.lock.ID() ctrString := fmt.Sprintf("container %s", ctr.ID()) - locksArr, ok := locksInUse[lockNum] - if ok { - locksInUse[lockNum] = append(locksArr, ctrString) - } else { - locksInUse[lockNum] = []string{ctrString} - } + locksInUse[lockNum] = append(locksInUse[lockNum], ctrString) } pods, err := r.state.AllPods() @@ -1221,12 +1216,7 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) { for _, pod := range pods { lockNum := pod.lock.ID() podString := fmt.Sprintf("pod %s", pod.ID()) - locksArr, ok := locksInUse[lockNum] - if ok { - locksInUse[lockNum] = append(locksArr, podString) - } else { - locksInUse[lockNum] = []string{podString} - } + locksInUse[lockNum] = append(locksInUse[lockNum], podString) } volumes, err := r.state.AllVolumes() @@ -1236,12 +1226,7 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) { for _, vol := range volumes { lockNum := vol.lock.ID() volString := fmt.Sprintf("volume %s", vol.Name()) - locksArr, ok := locksInUse[lockNum] - if ok { - locksInUse[lockNum] = append(locksArr, volString) - } else { - locksInUse[lockNum] = []string{volString} - } + locksInUse[lockNum] = append(locksInUse[lockNum], volString) } // Now go through and find any entries with >1 item associated diff --git a/test/e2e/info_test.go b/test/e2e/info_test.go index b124067bde..cc13766aab 100644 --- a/test/e2e/info_test.go +++ b/test/e2e/info_test.go @@ -177,7 +177,7 @@ var _ = Describe("Podman Info", func() { Expect(session.OutputToString()).To(Equal(want)) }) - It("Podman info: check lock count", func() { + It("Podman info: check lock count", Serial, func() { // This should not run on architectures and OSes that use the file locks backend. // Which, for now, is Linux + RISCV and FreeBSD, neither of which are in CI - so // no skips. @@ -197,6 +197,9 @@ var _ = Describe("Podman Info", func() { free2, err := strconv.Atoi(info2.OutputToString()) Expect(err).To(Not(HaveOccurred())) + // Effectively, we are checking that 1 lock has been taken. + // We do this by comparing the number of locks after (plus 1), to the number of locks before. + // Don't check absolute numbers because there is a decent chance of contamination, containers that were never removed properly, etc. Expect(free1).To(Equal(free2 + 1)) }) })