From c2a5309efa83f03f2dde3bce9307993ba095ddfc Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Mon, 29 Apr 2024 11:42:20 +0100 Subject: [PATCH] Remediation for verifier regression in kernels ~6.8 and greater Some days ago we put a fix for arm64 kernels as a verifier regression was introduced causing loading errors [commit](https://github.com/javierhonduco/lightswitch/commit/a8094d18d0e8a662d3581380934cd185eb30bf45). Turns out this is not just affecting arm64, as it just happened it's that my arm64 VM was running a more up to date kernel. This regression affects x86 as well. This commit changes the remediation to unrolloing the binary search loop and adds a kernel regression test. I haven't measured the performance implication of doing this vs the previous remediation. This is out of the scope of this PR and it's something I will work on later on. When I have spare cycles I should report this upstream. The error ========= ``` 292: (bf) r6 = r3 ; R3_w=scalar(id=52209,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) R6_w=scalar(id=52209,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) 293: (bf) r3 = r2 ; R2_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) R3_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) 294: (2d) if r4 > r8 goto pc+1 296: R0_w=scalar() R1=11 R2_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) R3_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) R4_w=scalar(umin=9,umax=0xfffffffffffffffa) R5_w=scalar() R6_w=scalar(id=52209,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) R7=scalar(id=52027,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) R8=scalar(id=24520,umin=8,umax=0xfffffffffffffff9) R9_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff0000,var_off=(0x0; 0xff0000)) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=map_value(map=unwind_tables,ks=8,vs=1400000) fp-56=6 fp-64=scalar(id=52027,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) fp-72=map_value(map=heap,ks=4,vs=1080) fp-80=map_value(map=heap,ks=4,vs=1080) fp-88=map_value(map=exec_mappings,ks=16,vs=32) fp-96=mmmmmmmm fp-104=ctx() fp-112=mmmmmmmm fp-120=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-128=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-136=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) ; if (table->rows[mid].pc <= pc) { 296: (2d) if r4 > r8 goto pc-83 214: R0_w=scalar() R1=11 R2_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) R3_w=scalar(id=52208,smin=umin=smin32=umin32=1,smax=umax=smax32=umax32=0x1869d,var_off=(0x0; 0x1ffff)) R4_w=scalar(umin=9,umax=0xfffffffffffffffa) R5_w=scalar() R6_w=scalar(id=52209,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) R7=scalar(id=52027,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) R8=scalar(id=24520,umin=8,umax=0xfffffffffffffff9) R9_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff0000,var_off=(0x0; 0xff0000)) R10=fp0 fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=map_value(map=unwind_tables,ks=8,vs=1400000) fp-56=6 fp-64=scalar(id=52027,smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x1869e,var_off=(0x0; 0x1ffff)) fp-72=map_value(map=heap,ks=4,vs=1080) fp-80=map_value(map=heap,ks=4,vs=1080) fp-88=map_value(map=exec_mappings,ks=16,vs=32) fp-96=mmmmmmmm fp-104=ctx() fp-112=mmmmmmmm fp-120=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) fp-128=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) fp-136=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) ; LOG("========== left %llu right %llu (shard index %d)", left, right, 214: (18) r9 = 0xbadfadbadfadbad ; R9_w=0xbadfadbadfadbad ; for (int i = 0; i < MAX_BINARY_SEARCH_DEPTH; i++) { 216: (07) r1 += -1 ; R1_w=10 217: (bf) r2 = r1 ; R1_w=10 R2_w=10 218: (67) r2 <<= 32 BPF program is too large. Processed 1000001 insn processed 1000001 insns (limit 1000000) max_states_per_insn 29 total_states 18306 peak_states 2761 mark_read 106 -- END PROG LOAD LOG -- libbpf: prog 'dwarf_unwind': failed to load: -7 libbpf: failed to load object 'profiler_bpf' libbpf: failed to load BPF skeleton 'profiler_bpf': -7 thread 'main' panicked at src/profiler.rs:159:36: load skel: Error: Argument list too long (os error 7) note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace ``` Test Plan ========= Added a regression test. --- flake.nix | 2 ++ src/bpf/profiler.bpf.c | 26 ++++++++------------------ vm.nix | 26 ++++++++++++++++++++++++++ vmtest.toml | 10 ++++++++++ 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/flake.nix b/flake.nix index 5cff985..62c8764 100644 --- a/flake.nix +++ b/flake.nix @@ -58,6 +58,8 @@ (import ./vm.nix { inherit pkgs; }).kernel_6_0 (import ./vm.nix { inherit pkgs; }).kernel_6_2 (import ./vm.nix { inherit pkgs; }).kernel_6_6 + (import ./vm.nix { inherit pkgs; }).kernel_6_8_7 + (import ./vm.nix { inherit pkgs; }).kernel_6_9_rc5 ]; LIBCLANG_PATH = lib.makeLibraryPath [ llvmPackages_16.libclang ]; diff --git a/src/bpf/profiler.bpf.c b/src/bpf/profiler.bpf.c index b570f67..3d08111 100644 --- a/src/bpf/profiler.bpf.c +++ b/src/bpf/profiler.bpf.c @@ -71,29 +71,19 @@ struct { } rate_limits SEC(".maps"); -// On arm64 running 6.8.4-200.fc39.aarch64 the verifier fails with argument list too long. This -// did not use to happen before and it's probably due to a regression in the way the verifier -// accounts for the explored paths. I have tried many other things, such as two mid variables -// but that did not do it. The theory of why this works is that perhaps it's making the verifier -// reset the state it had about the program exploration so the branches its exploring per iteration -// do not grow as much. -#ifdef __TARGET_ARCH_arm64 -static u32 __attribute__((optnone)) verifier_workaround(u32 value) { - return value; -} -#endif -#ifdef __TARGET_ARCH_x86 -static __always_inline u32 verifier_workaround(u32 value) { - return value; -} -#endif - // Binary search the unwind table to find the row index containing the unwind // information for a given program counter (pc) relative to the object file. static __always_inline u64 find_offset_for_pc(stack_unwind_table_t *table, u64 pc, u64 left, u64 right) { u64 found = BINARY_SEARCH_DEFAULT; + + // On kernels ~6.8 and greater the verifier fails with argument list too long. This did not use to + // happen before and it's probably due to a regression in the way the verifier accounts for the explored + // paths. I have tried many other things, such as two mid variables but that did not do it. + // Perhaps unrolling the loop works is that the verifier doesn't have as many states to explore + // per iteration. + #pragma unroll for (int i = 0; i < MAX_BINARY_SEARCH_DEPTH; i++) { // TODO(javierhonduco): ensure that this condition is right as we use // unsigned values... @@ -102,7 +92,7 @@ static __always_inline u64 find_offset_for_pc(stack_unwind_table_t *table, u64 p return found; } - u32 mid = verifier_workaround((left + right) / 2); + u32 mid = (left + right) / 2; // Appease the verifier. if (mid < 0 || mid >= MAX_UNWIND_TABLE_SIZE) { LOG("\t.should never happen, mid: %lu, max: %lu", mid, diff --git a/vm.nix b/vm.nix index 173f4b2..d847ed7 100644 --- a/vm.nix +++ b/vm.nix @@ -52,6 +52,32 @@ ''; }; + kernel_6_8_7 = pkgs.stdenv.mkDerivation { + name = "download-kernel-6.8.7"; + src = pkgs.fetchurl { + url = "https://github.com/javierhonduco/lightswitch-kernels/raw/c0af7a3/bzImage_v6.8.7"; + hash = "sha256-fZwGajRi9+otzokRxoss99aH9PLRuyl2UfJ5Echehdo="; + }; + dontUnpack = true; + installPhase = '' + mkdir -p $out + cp -r $src $out/bzImage + ''; + }; + + kernel_6_9_rc5 = pkgs.stdenv.mkDerivation { + name = "download-kernel-6.9-rc5"; + src = pkgs.fetchurl { + url = "https://github.com/javierhonduco/lightswitch-kernels/raw/c0af7a3/bzImage_v6.9-rc5"; + hash = "sha256-EA+nJ1M0/6QFPVA+fYkvXDhBcsmTnALpGr+tCJZsVyw="; + }; + dontUnpack = true; + installPhase = '' + mkdir -p $out + cp -r $src $out/bzImage + ''; + }; + vmtest = pkgs.rustPlatform.buildRustPackage { name = "vmtest"; src = pkgs.fetchFromGitHub { diff --git a/vmtest.toml b/vmtest.toml index 3a06893..3e9501b 100644 --- a/vmtest.toml +++ b/vmtest.toml @@ -16,4 +16,14 @@ command = "/mnt/vmtest/target/debug/lightswitch --duration 0" [[target]] name = "Fedora 6.6" kernel = "/nix/store/77ixckavs2qidx1pglmlxsj6bfvjqijb-download-kernel-6.6/bzImage" +command = "/mnt/vmtest/target/debug/lightswitch --duration 0" + +[[target]] +name = "Upstream 6.8.7" +kernel = "/nix/store/3ip5l5bmrxpjdf11xbrqpbrh7pqjn9wa-download-kernel-6.8.7/bzImage" +command = "/mnt/vmtest/target/debug/lightswitch --duration 0" + +[[target]] +name = "Upstream v6.9-rc5" +kernel = "/nix/store/zjwmw9s2gr772mjf0y3pfqvknbs0mcdc-download-kernel-6.9-rc5/bzImage" command = "/mnt/vmtest/target/debug/lightswitch --duration 0" \ No newline at end of file