Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LSE-334: Fallback for getting locks from stack frames #99

Merged
merged 14 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 64 additions & 20 deletions drgn_tools/bt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import typing as t

import drgn
from drgn import Architecture
from drgn import FaultError
from drgn import Object
from drgn import Program
Expand Down Expand Up @@ -37,6 +38,21 @@
)


def func_name(prog: drgn.Program, frame: drgn.StackFrame) -> t.Optional[str]:
if frame.name:
return frame.name
try:
sym = frame.symbol().name
if ".isra." in sym:
return sym[: sym.index(".isra.")]
elif ".constprop." in sym:
return sym[: sym.index(".constprop.")]
else:
return sym
except LookupError:
return None


def frame_name(prog: drgn.Program, frame: drgn.StackFrame) -> str:
"""Return a suitable name for a stack frame"""
# Looking up the module for an address is currently a bit inefficient, since
Expand Down Expand Up @@ -150,6 +166,10 @@ def expand_traces(trace: drgn.StackTrace) -> t.List[drgn.StackTrace]:
# We should continue appending traces so long as (a) we can find a pt_regs,
# and (b) the stack pointer for that pt_regs is different than the stack
# pointer for the current stack.
#
# NOTE: aarch64 does not guarantee having SP if we're unwinding with frame
# pointers. However, trace[0] always has SP, because we generally have a
# full register set to start the trace. Thus, this should be safe to test.
while pt_regs is not None and pt_regs.sp.value_() != trace[0].sp:
# Interrupted user address.
if (
Expand Down Expand Up @@ -279,9 +299,28 @@ def print_frames(
:param start_idx: Where to start counting the frame indices from
:param indent: How many spaces to indent the output
"""
# On aarch64 without DWARF, it seems we're not guaranteed to have the stack
# pointer, or the frame pointer. Fallback to FP, then NULL, here so we don't
# crash during unwinds.
if prog.platform.arch == Architecture.AARCH64:

def get_sp(frame: drgn.StackFrame) -> int:
try:
return frame.sp
except LookupError:
try:
return frame.register("fp")
except LookupError:
return 0

else:

def get_sp(frame: drgn.StackFrame) -> int:
return frame.sp

pfx = " " * indent
for i, frame in enumerate(trace):
sp = frame.sp # drgn 0.0.22
sp = get_sp(frame)
intr = "!" if frame.interrupted else " "
try:
pc = hex(frame.pc)
Expand All @@ -304,7 +343,7 @@ def print_frames(
# with a different stack pointer than the previous. That is: only
# when we reach the frame for a non-inline function. Also, only
# output registers when we have show_vars=True.
if i == len(trace) - 1 or trace[i].sp != trace[i + 1].sp:
if i == len(trace) - 1 or sp != get_sp(trace[i + 1]):
registers = frame.registers()
regnames = list(registers.keys())
# This formats the registers in three columns.
Expand Down Expand Up @@ -340,7 +379,13 @@ def print_frames(
raise
if val.absent_ and not show_absent:
continue
val_str = val.format_(dereference=False).replace("\n", "\n ")

try:
val_str = val.format_(dereference=False).replace(
"\n", "\n "
)
except FaultError:
val_str = "(FaultError occurred while formatting!)"
print(pfx + " " * 5 + f"{local} = {val_str}")


Expand Down Expand Up @@ -473,7 +518,10 @@ def _index_functions(prog: drgn.Program) -> t.Dict[str, t.Set[int]]:
try:
frames = bt_frames(task)
for frame in frames:
func_to_pids[frame.name].add(pid)
name = func_name(prog, frame)
if not name:
continue
func_to_pids[name].add(pid)
except FaultError:
# FaultError: catch unusual unwinding issues
pass
Expand All @@ -483,6 +531,7 @@ def _index_functions(prog: drgn.Program) -> t.Dict[str, t.Set[int]]:
def _indexed_bt_has_any(
prog: drgn.Program,
funcs: t.List[str],
one_per_task: bool = False,
) -> t.List[t.Tuple[drgn.Object, drgn.StackFrame]]:
index = prog.cache.get("drgn_tools.bt._index_functions")
if index is None:
Expand All @@ -496,15 +545,20 @@ def _indexed_bt_has_any(
for pid in pids:
task = find_task(prog, pid)
for frame in bt_frames(task):
if frame.name in funcs:
name = func_name(prog, frame)
if name in funcs:
result.append((task, frame))
if one_per_task:
# don't return any more results for this task
break
return result


def bt_has_any(
prog: drgn.Program,
funcs: t.List[str],
task: t.Optional[drgn.Object] = None,
one_per_task: bool = False,
) -> t.List[t.Tuple[drgn.Object, drgn.StackFrame]]:
"""
Search for tasks whose stack contains the given functions
Expand All @@ -528,26 +582,16 @@ def bt_has_any(
return _indexed_bt_has_any(prog, funcs)

frame_list = []
if task is not None:
try:
frames = bt_frames(task)
for frame in frames:
if frame.name in funcs:
frame_list.append((task, frame))
tasks = [task] if task is not None else for_each_task(prog)

return frame_list

except (FaultError, ValueError):
# FaultError: catch unusual unwinding issues
# ValueError: catch "cannot unwind stack of running task"
pass

for task in for_each_task(prog):
for task in tasks:
try:
frames = bt_frames(task)
for frame in frames:
if frame.name in funcs:
if func_name(prog, frame) in funcs:
frame_list.append((task, frame))
if one_per_task:
break
except (FaultError, ValueError):
# FaultError: catch unusual unwinding issues
# ValueError: catch "cannot unwind stack of running task"
Expand Down
1 change: 1 addition & 0 deletions drgn_tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def _set_debuginfo(
ctf_path = _get_ctf_path(release, args)
if ctf_path and HAVE_CTF:
load_ctf(prog, ctf_path)
prog.cache["using_ctf"] = True
return "CTF", ctf_path
elif ctf_path:
problems.append(
Expand Down
1 change: 1 addition & 0 deletions drgn_tools/corelens.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ def _load_prog_and_debuginfo(args: argparse.Namespace) -> Tuple[Program, bool]:
path = args.ctf or f"/lib/modules/{release}/kernel/vmlinux.ctfa"
if os.path.isfile(path) and _check_ctf_compat(release, args.vmcore):
load_ctf(prog, path)
prog.cache["using_ctf"] = True
return prog, True
except ModuleNotFoundError:
pass
Expand Down
4 changes: 3 additions & 1 deletion drgn_tools/debuginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,10 +680,12 @@ def get(

# For OL8, UEK6, the CTF generation process produced buggy data. The
# data was fixed starting in 5.4.17-2136.323.1: all prior versions are
# fully broken.
# fully broken. This is specific to x86_64: the aarch64 build used a
# different toolchain which was not affected.
if (
kver.ol_version == 8
and kver.uek_version == 6
and kver.arch == "x86_64"
and kver.release_tuple < (2136, 323, 1)
):
return cls.NO
Expand Down
Loading
Loading