Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

-fno-plt disturbs tracing the libc call and default Rust build flag use it. #1777

Closed
ChoKyuWon opened this issue Jul 26, 2023 · 11 comments
Closed

Comments

@ChoKyuWon
Copy link
Contributor

I want to track the allocation(malloc) in Rust, so I use the simple code as follows:

fn main() {
    let buf : [u8;64] = [0;64];
    let x = Box::<[u8;64]>::new(buf);
    println!("{:?}", *x);
}

And trace it with the following command:

$ RUSTFLAGS="-Zinstrument-mcount -Cpasses=ee-instrument<post-inline>" cargo build -Zbuild-std --target x86_64-unknown-linux-gnu
$ uftrace -la ./target/x86_64-unknown-linux-gnu/debug/test-rs
...
          [102876] |                                 __rdl_alloc() {
            [102876] |                                   core::alloc::layout::Layout::from_size_align_unchecked() {
   0.110 us [102876] |                                     core::ptr::alignment::Alignment::new_unchecked();
   0.461 us [102876] |                                   } /* core::alloc::layout::Layout::from_size_align_unchecked */
            [102876] |                                   std::sys::unix::alloc::_<impl core::alloc::global::GlobalAlloc for std::alloc::System>::alloc() {
            [102876] |                                     core::alloc::layout::Layout::align() {
   0.080 us [102876] |                                       core::ptr::alignment::Alignment::as_usize();
   0.361 us [102876] |                                     } /* core::alloc::layout::Layout::align */
            [102876] |                                     core::alloc::layout::Layout::align() {
   0.081 us [102876] |                                       core::ptr::alignment::Alignment::as_usize();
   0.331 us [102876] |                                     } /* core::alloc::layout::Layout::align */
   0.080 us [102876] |                                     core::alloc::layout::Layout::size();
   0.080 us [102876] |                                     core::alloc::layout::Layout::size();
   1.603 us [102876] |                                   } /* std::sys::unix::alloc::_<impl core::alloc::global::GlobalAlloc for std::alloc::System>::alloc */
   2.524 us [102876] |                                 } /* __rdl_alloc */
...

I use the -Zbuild-std option because the allocation layout(e.g., exchange_malloc, __rdl_alloc) is also not traced with this option.
As far as I know, __rdl_alloc calls the malloc eventually, but the trace misses it in my environment.

Additionally, I use the following code to test the direct libc call:

use libc::getpid;

fn main() {
    let pid = unsafe{getpid()};
    println!("Hello, world! {}", pid);
}

I ran it with uftrace and --nest-libcall, but the uftrace never found the getpid call.

$ uftrace --nest-libcall ./test-rs
# DURATION     TID     FUNCTION
            [105640] | std::rt::lang_start() {
            [105640] |   std::rt::lang_start::_{{closure}}() {
            [105640] |     std::sys_common::backtrace::__rust_begin_short_backtrace() {
            [105640] |       core::ops::function::FnOnce::call_once() {
            [105640] |         test_rs::main() {
   0.080 us [105640] |           core::fmt::Arguments::new_v1();
  10.520 us [105640] |         } /* test_rs::main */
  10.710 us [105640] |       } /* core::ops::function::FnOnce::call_once */
  10.850 us [105640] |     } /* std::sys_common::backtrace::__rust_begin_short_backtrace */
...

I attach the LLVM IR and asm of getpid example code to help the debug:

LLVM:
; test_rs::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN7test_rs4main17hd6c31088d106852bE() unnamed_addr #1 !dbg !344 {
start:
  call void @mcount(), !dbg !350
  %f.dbg.spill.i = alloca ptr, align 8
  %x.dbg.spill.i1 = alloca ptr, align 8
  %0 = alloca { ptr, ptr }, align 8
  %x.dbg.spill.i = alloca ptr, align 8
  %_7 = alloca [1 x { ptr, ptr }], align 8
  %_3 = alloca %"core::fmt::Arguments<'_>", align 8
  %pid = alloca i32, align 4
  call void @llvm.dbg.declare(metadata ptr %pid, metadata !348, metadata !DIExpression()), !dbg !351
  %1 = call i32 @getpid(), !dbg !352
  store i32 %1, ptr %pid, align 4, !dbg !352
  store ptr %pid, ptr %x.dbg.spill.i, align 8
  call void @llvm.dbg.declare(metadata ptr %x.dbg.spill.i, metadata !353, metadata !DIExpression()), !dbg !363
  store ptr %pid, ptr %x.dbg.spill.i1, align 8
  call void @llvm.dbg.declare(metadata ptr %x.dbg.spill.i1, metadata !365, metadata !DIExpression()), !dbg !375
  store ptr @"_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$i32$GT$3fmt17hb2743b109594184dE", ptr %f.dbg.spill.i, align 8
  call void @llvm.dbg.declare(metadata ptr %f.dbg.spill.i, metadata !374, metadata !DIExpression()), !dbg !377
  store ptr %pid, ptr %0, align 8, !dbg !378
  %2 = getelementptr inbounds { ptr, ptr }, ptr %0, i32 0, i32 1, !dbg !378
  store ptr @"_ZN4core3fmt3num3imp52_$LT$impl$u20$core..fmt..Display$u20$for$u20$i32$GT$3fmt17hb2743b109594184dE", ptr %2, align 8, !dbg !378
  %3 = load ptr, ptr %0, align 8, !dbg !379, !nonnull !23, !align !380, !noundef !23
  %4 = getelementptr inbounds { ptr, ptr }, ptr %0, i32 0, i32 1, !dbg !379
  %5 = load ptr, ptr %4, align 8, !dbg !379, !nonnull !23, !noundef !23
  %6 = insertvalue { ptr, ptr } poison, ptr %3, 0, !dbg !379
  %7 = insertvalue { ptr, ptr } %6, ptr %5, 1, !dbg !379
  %8 = extractvalue { ptr, ptr } %7, 0, !dbg !381
  %9 = extractvalue { ptr, ptr } %7, 1, !dbg !381
  %_8.0 = extractvalue { ptr, ptr } %7, 0, !dbg !382
  %_8.1 = extractvalue { ptr, ptr } %7, 1, !dbg !382
  %10 = getelementptr inbounds [1 x { ptr, ptr }], ptr %_7, i64 0, i64 0, !dbg !382
  %11 = getelementptr inbounds { ptr, ptr }, ptr %10, i32 0, i32 0, !dbg !382
  store ptr %_8.0, ptr %11, align 8, !dbg !382
  %12 = getelementptr inbounds { ptr, ptr }, ptr %10, i32 0, i32 1, !dbg !382
  store ptr %_8.1, ptr %12, align 8, !dbg !382
; call core::fmt::Arguments::new_v1
  call void @_ZN4core3fmt9Arguments6new_v117h615397d98d173ed3E(ptr sret(%"core::fmt::Arguments<'_>") %_3, ptr align 8 @alloc_fca65c4944bd343a41d02e4b76265d70, i64 2, ptr align 8 %_7, i64 1), !dbg !382
; call std::io::stdio::_print
  call void @_ZN3std2io5stdio6_print17h110350259680142bE(ptr %_3), !dbg !382
  ret void, !dbg !383
}

ASM:

00000000000265b0 <_ZN7test_rs4main17hd6c31088d106852bE>:
   265b0:       55                      push   %rbp
   265b1:       48 89 e5                mov    %rsp,%rbp
   265b4:       48 83 ec 70             sub    $0x70,%rsp
   265b8:       e8 83 fa ff ff          call   26040 <mcount@plt>
   265bd:       48 8b 05 ac 99 1c 00    mov    0x1c99ac(%rip),%rax        # 1eff70 <getpid@GLIBC_2.2.5>
   265c4:       ff d0                   call   *%rax                                              # Our missing getpid call
   265c6:       89 45 94                mov    %eax,-0x6c(%rbp)
...

Are there some missing flags I need to use to track the core libraries, such as libc or libstd in Rust?

@namhyung
Copy link
Owner

Some low-level libraries like libc are not allowed to trace because they caused troubles before. Please see setup_mod_plthook_data() in libmcount/plthook.c file.

@ChoKyuWon
Copy link
Contributor Author

ChoKyuWon commented Aug 24, 2023

Now I understand the reason the internal call stack in libc is not able to be traced. However, why is the libc entry call not also traced?

For example, I wrote these code and test:

// libtest.c
void* malloc_wrappar (int size) {
  return malloc(size);
}

// main.c
int main() {
    malloc_wrappar(1024);
}
$ gcc libtest.c -fPIC -shared -o libtest.so
$ gcc main.c libtest.so -o main
$ uftrace -P . -l ./main
# DURATION     TID     FUNCTION
          [1763009] | main() {
          [1763009] |   malloc_wrappar() {
   0.511 us [1763009] |     malloc();
   2.174 us [1763009] |   } /* malloc_wrappar */
   2.756 us [1763009] | } /* main */

Rust also wraps the libc as a core library, and I'm not sure about the internal structure, but I guess they use almost the same as the above example, but they are not able to trace.

I wonder if it is about the nest-libcall bug that has not been reported yet or a known issue that has not been solved yet.

@ChoKyuWon
Copy link
Contributor Author

ChoKyuWon commented Aug 26, 2023

When I compile the Rust and C code, it's not about the plt hooking or something else, but the real problem here is Rust usually call the libc function as an indirect function call, even if they are not needed.

Here is the assembly code:

Rust:

00000000000072a0 <_ZN13rust_libctest4main17h764c5f32c8353ea0E>:
    72a0:	55                   	push   %rbp
    72a1:	48 89 e5             	mov    %rsp,%rbp
    72a4:	48 83 ec 70          	sub    $0x70,%rsp
    72a8:	e8 83 d5 ff ff       	callq  4830 <mcount@plt>
    72ad:	48 8b 05 d4 2b 25 00 	mov    0x252bd4(%rip),%rax        # 259e88 <getpid@GLIBC_2.2.5>
    72b4:	ff d0                	callq  *%rax
    72b6:	89 45 dc             	mov    %eax,-0x24(%rbp)
    72b9:	48 8d 4d dc          	lea    -0x24(%rbp),%rcx

C:

000000000000077a <main>:
 77a:	55                   	push   %rbp
 77b:	48 89 e5             	mov    %rsp,%rbp
 77e:	ff 15 64 08 20 00    	callq  *0x200864(%rip)        # 200fe8 <mcount@GLIBC_2.2.5>
 784:	b8 00 00 00 00       	mov    $0x0,%eax
 789:	e8 42 fe ff ff       	callq  5d0 <getpid@plt>

You can see 0x72b4 at Rust call the getpid indirectly, call *rax.

If we test a simple C code as below:

#include <stdlib.h>
#include <unistd.h>

int main() {
  int (*getpid_ptr)() = getpid;
  getpid_ptr();
}

Then uftrace also not be able to trace the getpid call.

EDIT:
I have misunderstood on the previous evaluation. It's not the problem of direct/indirect, but the problem of PIE. If you compile the first C code, only call the getpid, but if you compile it with -fno-plt option, the assembly would as follows:

0000000000400600 <main>:
  400600:	55                   	push   %rbp
  400601:	48 89 e5             	mov    %rsp,%rbp
  400604:	48 83 ec 10          	sub    $0x10,%rsp
  400608:	e8 93 fe ff ff       	callq  4004a0 <mcount@plt>
  40060d:	ff 15 d5 09 20 00    	callq  *0x2009d5(%rip)        # 600fe8 <getpid@GLIBC_2.2.5>

And the getpid call is also not able to be traced.

@ChoKyuWon
Copy link
Contributor Author

ChoKyuWon commented Aug 26, 2023

We can trace if we enable the plt on Rust, by -Zplt=true.

$ RUSTFLAGS="-Z instrument-mcount -Cpasses=ee-instrument<post-inline> -Zplt=true" cargo build
$ uftrace ./target/debug/test
Hello, world! 30255
# DURATION     TID     FUNCTION
            [ 30255] | std::rt::lang_start::h4042783a20276ef0() {
            [ 30255] |   std::rt::lang_start::_$u7b$$u7b$closure$u7d$$u7d$::h2b4394c38313fed0() {
            [ 30255] |     std::sys_common::backtrace::__rust_begin_short_backtrace::had8f373937aa6c04() {
            [ 30255] |       core::ops::function::FnOnce::call_once::hb3197c7860991cd0() {
            [ 30255] |         rust_libctest::main::h764c5f32c8353ea0() {
   0.300 us [ 30255] |           getpid();
  22.300 us [ 30255] |         } /* rust_libctest::main::h764c5f32c8353ea0 */
  23.000 us [ 30255] |       } /* core::ops::function::FnOnce::call_once::hb3197c7860991cd0 */

We may need documentation about the build flags to make it easy to trace the Rust project.

EDIT:
I found 50a8cae solved this problem, and my uftrace version was uftrace v0.14-26-gd6f6 ( x86_64 ), but somehow it doesn't work now.

@ChoKyuWon ChoKyuWon changed the title Some calls to core libraries such as libc are missing in Rust trace even with --nest-libcall(-l) -fno-plt disturbs tracing the libc call and default Rust build flag use it. Aug 26, 2023
@honggyukim
Copy link
Collaborator

Hi @ChoKyuWon, thanks for digging into the problem. I also found that -fno-plt handling is broken at some point. We might have to go through git bisect to find when it started to be broken.

I can confirm that -fno-plt is not properly handled as the following tests show.

$ gcc -pg tests/s-abc.c 
$ uftrace -F main a.out 
# DURATION     TID     FUNCTION
            [ 15700] | main() {
            [ 15700] |   a() {
            [ 15700] |     b() {
            [ 15700] |       c() {
   0.619 us [ 15700] |         getpid();
   1.208 us [ 15700] |       } /* c */
   1.397 us [ 15700] |     } /* b */
   1.556 us [ 15700] |   } /* a */
   1.797 us [ 15700] | } /* main */

-fno-plt makes getpid disappears in the tracing result.

$ gcc -pg tests/s-abc.c -fno-plt
$ uftrace -F main a.out 
# DURATION     TID     FUNCTION
            [ 15715] | main() {
            [ 15715] |   a() {
            [ 15715] |     b() {
   0.594 us [ 15715] |       c();
   1.120 us [ 15715] |     } /* b */
   1.287 us [ 15715] |   } /* a */
   1.542 us [ 15715] | } /* main */

@honggyukim
Copy link
Collaborator

honggyukim commented Aug 26, 2023

Hmm.. I see the same problem even in v0.9. And v0.8.3 shows segfault so I couldn't test below this version.

Maybe the segfault in v0.8.3 is related to #738 and c36e56b.

@honggyukim
Copy link
Collaborator

I found 50a8cae solved this problem, and my uftrace version was uftrace v0.14-26-gd6f6 ( x86_64 ), but somehow it doesn't work now.

It's weird that 50a8cae was introduced since v0.9.2, but going back to v0.9.2 doesn't solve the problem. Maybe something has been changed in the compiler side.

@honggyukim
Copy link
Collaborator

@namhyung might have a better idea on this problem

@ChoKyuWon
Copy link
Contributor Author

ChoKyuWon commented Aug 27, 2023

After shallow trial, I found that even -fno-plt binary has pltgot section for __monstartup even with -Wl,-z,now.
So I made a tricky fix bypassing the flag enabling when it found __monstartup, and it seems well, so the root cause is that I guess.

diff --git a/libmcount/plthook.c b/libmcount/plthook.c
index 53dd7a4d..6bbc4b5c 100644
--- a/libmcount/plthook.c
+++ b/libmcount/plthook.c
@@ -204,7 +204,8 @@ static int find_got(struct uftrace_elf_data *elf, struct uftrace_elf_iter *iter,
 			pltgot_addr = (unsigned long)iter->dyn.d_un.d_val + offset;
 			break;
 		case DT_JMPREL:
-			plt_found = true;
+			if(strncmp("__monstartup", elf_get_name(elf, iter, iter->sym.st_name), sizeof("__monstartup")))
+				plt_found = true;
 			break;
 		default:
 			break;
@@ -212,6 +213,7 @@ static int find_got(struct uftrace_elf_data *elf, struct uftrace_elf_iter *iter,
 	}
 
 	if (!plt_found) {
+		fprintf(stderr, "plt_not_found!\n");
 		pd = mcount_arch_hook_no_plt(elf, modname, offset);
 		if (pd == NULL)
 			pr_dbg2("no PLTGOT found.. ignoring...\n");

However, when I applied this fix and tried to run the -fno-plt binary, it made the segfault. (but it poped up the "plt_not_found" message in the fix.)
I'll take more deep dive in this problem.

@namhyung
Copy link
Owner

Yeah I also noticed it didn't work well at some point but never had a chance to take a deeper look. I think compilers changed the pattern how they generate function calls without PLT.

ChoKyuWon added a commit to ChoKyuWon/uftrace that referenced this issue Aug 29, 2023
Issue namhyung#1777 reported that no-plt binary is handled as plt-enable
binary because some functions are still left in plt section.
The patch fixes this bug by comparing the name of self with
a list of known functions.

However, current `mcount_arch_hook_no_plt` make segfault on x86
now, so I comment it out and print the debug message if the binary
was built with `-fno-plt` option.

Signed-off-by: ChoKyuWon <[email protected]>
@ChoKyuWon
Copy link
Contributor Author

In mcount_arch_hook_no_plt(), I found the value sym->addr itself points to the PLT and it's not Elf64_Rela structure in case if the symbols have PLT.

$ gcc -pg -fno-plt tests/s-abc.c
$ gdb uftrace
gdb-peda$ r ./a.out 
gdb-peda$ print sym->addr
$1 = 0x555555554600

gdb-peda$ elfsymbol
Found 3 symbols
__monstartup@plt = 0x555555554600
__cxa_atexit@plt = 0x555555554610
__cxa_finalize@plt = 0x555555554620

gdb-peda$ elfsection
.plt = 0x5555555545f0
.plt.got = 0x555555554620
.got = 0x555555754f98
...

gdb-peda$ x/3i sym->addr
   0x555555554600 <__monstartup@plt>:   jmp    QWORD PTR [rip+0x2009aa]        # 0x555555754fb0
   0x555555554606 <__monstartup@plt+6>: push   0x0
   0x55555555460b <__monstartup@plt+11>:        jmp    0x5555555545f0

Maybe the problem is that the mcount_arch_hook_no_plt threat the entire symbols do not have the PLT, but some cases do not, so SEGFAULT occured IMO.

ChoKyuWon added a commit to ChoKyuWon/uftrace that referenced this issue Sep 1, 2023
Issue namhyung#1777 reported that no-plt binary is handled as plt-enable
binary because some functions are still left in plt section.
The patch fixes this bug by comparing the name of self with
a list of known functions.

Signed-off-by: ChoKyuWon <[email protected]>
ChoKyuWon added a commit to ChoKyuWon/uftrace that referenced this issue Sep 16, 2023
Issue reported that no-plt binary is handled as plt-enable
binary because some functions are still left in plt section.
The patch fixes this bug by comparing the name of self with
a list of known functions.

Fixes: namhyung#1777

Signed-off-by: ChoKyuWon <[email protected]>
@namhyung namhyung added this to the v0.15 milestone Dec 18, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants