From 72cf3896851aac16c834d6ac9508e1aab281a93e Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Tue, 9 Nov 2021 14:07:49 +1100 Subject: [PATCH] shm_lock: Handle ENOSPC better in AllocateSemaphore When starting a container libpod/runtime_pod_linux.go:NewPod calls libpod/lock/lock.go:AllocateLock ends up in here. If you exceed num_locks, in response to a "podman run ..." you will see: Error: error allocating lock for new container: no space left on device As noted inline, this error is technically true as it is talking about the SHM area, but for anyone who has not dug into the source (i.e. me, before a few hours ago :) your initial thought is going to be that your disk is full. I spent quite a bit of time trying to diagnose what disk, partition, overlay, etc. was filling up before I realised this was actually due to leaking from failing containers. This overrides this case to give a more explicit message that hopefully puts people on the right track to fixing this faster. You will now see: $ ./bin/podman run --rm -it fedora bash Error: error allocating lock for new container: allocation failed; exceeded num_locks (20) [NO NEW TESTS NEEDED] (just changes an existing error message) Signed-off-by: Ian Wienand --- libpod/lock/shm/shm_lock.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libpod/lock/shm/shm_lock.go b/libpod/lock/shm/shm_lock.go index 322e92a8f6..fea02a619f 100644 --- a/libpod/lock/shm/shm_lock.go +++ b/libpod/lock/shm/shm_lock.go @@ -130,8 +130,17 @@ func (locks *SHMLocks) AllocateSemaphore() (uint32, error) { // semaphore indexes, and can still return error codes. retCode := C.allocate_semaphore(locks.lockStruct) if retCode < 0 { + var err = syscall.Errno(-1 * retCode) // Negative errno returned - return 0, syscall.Errno(-1 * retCode) + if errors.Is(err, syscall.ENOSPC) { + // ENOSPC expands to "no space left on device". While it is technically true + // that there's no room in the SHM inn for this lock, this tends to send normal people + // down the path of checking disk-space which is not actually their problem. + // Give a clue that it's actually due to num_locks filling up. + var errFull = errors.Errorf("allocation failed; exceeded num_locks (%d)", locks.maxLocks) + return uint32(retCode), errFull + } + return uint32(retCode), syscall.Errno(-1 * retCode) } return uint32(retCode), nil