-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for lock debugging #18796
Merged
Merged
Changes from 4 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
3b39eb1
Include lock number in pod/container/volume inspect
mheon 1013696
Add number of free locks to `podman info`
mheon 0948c07
Add a new hidden command, podman system locks
mheon 4fda793
`system locks` now reports held locks
mheon 944673c
Address review feedback and add manpage notes
mheon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package system | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/containers/podman/v4/cmd/podman/registry" | ||
"github.com/containers/podman/v4/cmd/podman/validate" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var ( | ||
locksCommand = &cobra.Command{ | ||
Use: "locks", | ||
Short: "Debug Libpod's use of locks, identifying any potential conflicts", | ||
Args: validate.NoArgs, | ||
Hidden: true, | ||
RunE: func(cmd *cobra.Command, args []string) error { | ||
return runLocks() | ||
}, | ||
Example: "podman system locks", | ||
} | ||
) | ||
|
||
func init() { | ||
registry.Commands = append(registry.Commands, registry.CliCommand{ | ||
Command: locksCommand, | ||
Parent: systemCmd, | ||
}) | ||
} | ||
func runLocks() error { | ||
report, err := registry.ContainerEngine().Locks(registry.Context()) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
for lockNum, objects := range report.LockConflicts { | ||
fmt.Printf("Lock %d is in use by the following\n:", lockNum) | ||
for _, obj := range objects { | ||
fmt.Printf("\t%s\n", obj) | ||
} | ||
} | ||
|
||
if len(report.LockConflicts) > 0 { | ||
fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n\n") | ||
} else { | ||
fmt.Printf("\nNo lock conflicts have been detected, system safe from deadlocks.\n\n") | ||
} | ||
|
||
for _, lockNum := range report.LocksHeld { | ||
fmt.Printf("Lock %d is presently being held\n", lockNum) | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,12 +20,18 @@ static size_t compute_shm_size(uint32_t num_bitmaps) { | |
// Handles exceptional conditions, including a mutex locked by a process that | ||
// died holding it. | ||
// Returns 0 on success, or positive errno on failure. | ||
static int take_mutex(pthread_mutex_t *mutex) { | ||
static int take_mutex(pthread_mutex_t *mutex, bool trylock) { | ||
int ret_code; | ||
|
||
do { | ||
ret_code = pthread_mutex_lock(mutex); | ||
} while(ret_code == EAGAIN); | ||
if (!trylock) { | ||
do { | ||
ret_code = pthread_mutex_lock(mutex); | ||
} while(ret_code == EAGAIN); | ||
} else { | ||
do { | ||
ret_code = pthread_mutex_trylock(mutex); | ||
} while(ret_code == EAGAIN); | ||
} | ||
|
||
if (ret_code == EOWNERDEAD) { | ||
// The previous owner of the mutex died while holding it | ||
|
@@ -309,7 +315,7 @@ int64_t allocate_semaphore(shm_struct_t *shm) { | |
} | ||
|
||
// Lock the semaphore controlling access to our shared memory | ||
ret_code = take_mutex(&(shm->segment_lock)); | ||
ret_code = take_mutex(&(shm->segment_lock), false); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
@@ -383,7 +389,7 @@ int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) { | |
test_map = 0x1 << index_in_bitmap; | ||
|
||
// Lock the mutex controlling access to our shared memory | ||
ret_code = take_mutex(&(shm->segment_lock)); | ||
ret_code = take_mutex(&(shm->segment_lock), false); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
@@ -436,7 +442,7 @@ int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) { | |
test_map = 0x1 << index_in_bitmap; | ||
|
||
// Lock the mutex controlling access to our shared memory | ||
ret_code = take_mutex(&(shm->segment_lock)); | ||
ret_code = take_mutex(&(shm->segment_lock), false); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
@@ -475,7 +481,7 @@ int32_t deallocate_all_semaphores(shm_struct_t *shm) { | |
} | ||
|
||
// Lock the mutex controlling access to our shared memory | ||
ret_code = take_mutex(&(shm->segment_lock)); | ||
ret_code = take_mutex(&(shm->segment_lock), false); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
@@ -513,7 +519,7 @@ int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) { | |
bitmap_index = sem_index / BITMAP_SIZE; | ||
index_in_bitmap = sem_index % BITMAP_SIZE; | ||
|
||
return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap])); | ||
return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]), false); | ||
} | ||
|
||
// Unlock a given semaphore | ||
|
@@ -537,3 +543,98 @@ int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) { | |
|
||
return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap])); | ||
} | ||
|
||
// Get the number of free locks. | ||
// Returns a positive integer guaranteed to be less than UINT32_MAX on success, | ||
// or negative errno values on failure. | ||
// On success, the returned integer is the number of free semaphores. | ||
int64_t available_locks(shm_struct_t *shm) { | ||
int ret_code, i, count; | ||
bitmap_t test_map; | ||
int64_t free_locks = 0; | ||
|
||
if (shm == NULL) { | ||
return -1 * EINVAL; | ||
} | ||
|
||
// Lock the semaphore controlling access to the SHM segment. | ||
// This isn't strictly necessary as we're only reading, but it seems safer. | ||
ret_code = take_mutex(&(shm->segment_lock), false); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
||
// Loop through all bitmaps, counting number of allocated locks. | ||
for (i = 0; i < shm->num_bitmaps; i++) { | ||
// Short-circuit to catch fully-empty bitmaps quick. | ||
if (shm->locks[i].bitmap == 0) { | ||
free_locks += 32; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be |
||
continue; | ||
} | ||
|
||
// Use Kernighan's Algorithm to count bits set. Subtract from number of bits | ||
// in the integer to get free bits, and thus free lock count. | ||
test_map = shm->locks[i].bitmap; | ||
count = 0; | ||
while (test_map) { | ||
test_map = test_map & (test_map - 1); | ||
count++; | ||
} | ||
|
||
free_locks += 32 - count; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
} | ||
|
||
// Clear the mutex | ||
ret_code = release_mutex(&(shm->segment_lock)); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
||
// Return free lock count. | ||
return free_locks; | ||
} | ||
|
||
// Attempt to take a given semaphore. If successfully taken, it is immediately | ||
// released before the function returns. | ||
// Used to check if a semaphore is in use, to detect potential deadlocks where a | ||
// lock has not been released for an extended period of time. | ||
// Note that this is NOT POSIX trylock as the lock is immediately released if | ||
// taken. | ||
// Returns negative errno on failure. | ||
// On success, returns 1 if the lock was successfully taken, and 0 if it was | ||
// not. | ||
int32_t try_lock(shm_struct_t *shm, uint32_t sem_index) { | ||
int bitmap_index, index_in_bitmap, ret_code; | ||
pthread_mutex_t *mutex; | ||
|
||
if (shm == NULL) { | ||
return -1 * EINVAL; | ||
} | ||
|
||
if (sem_index >= shm->num_locks) { | ||
return -1 * EINVAL; | ||
} | ||
|
||
bitmap_index = sem_index / BITMAP_SIZE; | ||
index_in_bitmap = sem_index % BITMAP_SIZE; | ||
|
||
mutex = &(shm->locks[bitmap_index].locks[index_in_bitmap]); | ||
|
||
ret_code = take_mutex(mutex, true); | ||
|
||
if (ret_code == EBUSY) { | ||
// Did not successfully take the lock | ||
return 0; | ||
} else if (ret_code != 0) { | ||
// Another, unrelated error | ||
return -1 * ret_code; | ||
} | ||
|
||
// Lock taken successfully, unlock and return. | ||
ret_code = release_mutex(mutex); | ||
if (ret_code != 0) { | ||
return -1 * ret_code; | ||
} | ||
|
||
return 1; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As much as I understand this wording I would not claim
system safe from deadlocks
, this only checks for shm lock conflicts. We can still have ABBA deadlocks or any other deadlock between different kind of locks such as mutex, go channels, WaitGroups or even c/storage locks.