From aef871b3da15cf7d2117775fa1eec25c13e1e91e Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 1 Nov 2023 08:55:31 +0000 Subject: [PATCH 01/17] fuzzgen: Allow restricting generated opcodes with env var (#7433) * fuzzgen: Allow restricting generated opcodes with env var * fuzzgen: Add `FUZZGEN_ALLOWED_OPS` docs --- cranelift/fuzzgen/src/function_generator.rs | 17 +++++++++++++++++ fuzz/README.md | 12 ++++++++++++ 2 files changed, 29 insertions(+) diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 651b784b2b53..38cdc0c05dfa 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -22,6 +22,7 @@ use cranelift::prelude::{ use once_cell::sync::Lazy; use std::collections::HashMap; use std::ops::RangeInclusive; +use std::str::FromStr; use target_lexicon::{Architecture, Triple}; type BlockSignature = Vec; @@ -797,6 +798,17 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { F32X4, F64X2, // SIMD Floats ]; + // When this env variable is passed, we only generate instructions for the opcodes listed in + // the comma-separated list. This is useful for debugging, as it allows us to focus on a few + // specific opcodes. + let allowed_opcodes = std::env::var("FUZZGEN_ALLOWED_OPS").ok().map(|s| { + s.split(',') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| Opcode::from_str(s).expect("Unrecoginzed opcode")) + .collect::>() + }); + Opcode::all() .iter() .filter(|op| { @@ -1048,6 +1060,11 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::FcvtFromSint, &[I32X4], &[F64X2]), ) }) + .filter(|(op, ..)| { + allowed_opcodes + .as_ref() + .map_or(true, |opcodes| opcodes.contains(op)) + }) .collect() }); diff --git a/fuzz/README.md b/fuzz/README.md index f52c2c71b97c..5efabdcf39bd 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -88,3 +88,15 @@ following steps to reproduce it locally: to print the configuration and WebAssembly input used by the test case (see uses of `log_wasm` in the `wasmtime-fuzzing` crate). +## Target specific options + +### `cranelift-fuzzgen` + +Fuzzgen supports passing the `FUZZGEN_ALLOWED_OPS` environment variable, which when available restricts the instructions that it will generate. + +Running `FUZZGEN_ALLOWED_OPS=ineg,ishl cargo fuzz run cranelift-fuzzgen` will run fuzzgen but only generate `ineg` or `ishl` opcodes. + +### `cranelift-icache` + +The icache target also uses the fuzzgen library, thus also supports the `FUZZGEN_ALLOWED_OPS` enviornment variable as described in the `cranelift-fuzzgen` section above. + From db946cd51fb0ff3f58004425f0924a6aaf0d8684 Mon Sep 17 00:00:00 2001 From: Jeffrey Charles Date: Wed, 1 Nov 2023 09:23:23 -0700 Subject: [PATCH 02/17] Fix Winch bug for funcs with params and locals (#7443) --- winch/codegen/src/frame/mod.rs | 2 +- .../x64/br_if/as_local_set_value.wat | 22 ++--- winch/filetests/filetests/x64/loop/for.wat | 53 +++++------ winch/filetests/filetests/x64/loop/while.wat | 44 ++++----- winch/filetests/filetests/x64/table/fill.wat | 89 ++++++++++--------- 5 files changed, 109 insertions(+), 101 deletions(-) diff --git a/winch/codegen/src/frame/mod.rs b/winch/codegen/src/frame/mod.rs index 9a0329856c8c..3e555a91283d 100644 --- a/winch/codegen/src/frame/mod.rs +++ b/winch/codegen/src/frame/mod.rs @@ -108,7 +108,7 @@ impl Frame { locals_size, vmctx_slot: LocalSlot::i64(vmctx_offset), defined_locals_range: DefinedLocalsRange( - defined_locals_start..defined_locals.stack_size, + defined_locals_start..(defined_locals_start + defined_locals.stack_size), ), }) } diff --git a/winch/filetests/filetests/x64/br_if/as_local_set_value.wat b/winch/filetests/filetests/x64/br_if/as_local_set_value.wat index 054e6a8b38e9..ee5f0ac87d94 100644 --- a/winch/filetests/filetests/x64/br_if/as_local_set_value.wat +++ b/winch/filetests/filetests/x64/br_if/as_local_set_value.wat @@ -12,13 +12,15 @@ ;; 1: 4889e5 mov rbp, rsp ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi -;; c: 4c893424 mov qword ptr [rsp], r14 -;; 10: 8b4c240c mov ecx, dword ptr [rsp + 0xc] -;; 14: b811000000 mov eax, 0x11 -;; 19: 85c9 test ecx, ecx -;; 1b: 0f8509000000 jne 0x2a -;; 21: 8944240c mov dword ptr [rsp + 0xc], eax -;; 25: b8ffffffff mov eax, 0xffffffff -;; 2a: 4883c410 add rsp, 0x10 -;; 2e: 5d pop rbp -;; 2f: c3 ret +;; c: c744240800000000 mov dword ptr [rsp + 8], 0 +;; 14: 4531db xor r11d, r11d +;; 17: 4c893424 mov qword ptr [rsp], r14 +;; 1b: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 1f: b811000000 mov eax, 0x11 +;; 24: 85c9 test ecx, ecx +;; 26: 0f8509000000 jne 0x35 +;; 2c: 8944240c mov dword ptr [rsp + 0xc], eax +;; 30: b8ffffffff mov eax, 0xffffffff +;; 35: 4883c410 add rsp, 0x10 +;; 39: 5d pop rbp +;; 3a: c3 ret diff --git a/winch/filetests/filetests/x64/loop/for.wat b/winch/filetests/filetests/x64/loop/for.wat index cbeb993b6e9c..2db94dd07ef6 100644 --- a/winch/filetests/filetests/x64/loop/for.wat +++ b/winch/filetests/filetests/x64/loop/for.wat @@ -19,29 +19,30 @@ ;; 1: 4889e5 mov rbp, rsp ;; 4: 4883ec20 sub rsp, 0x20 ;; 8: 48897c2418 mov qword ptr [rsp + 0x18], rdi -;; d: 48c744241000000000 -;; mov qword ptr [rsp + 0x10], 0 -;; 16: 4c893424 mov qword ptr [rsp], r14 -;; 1a: 48c7c001000000 mov rax, 1 -;; 21: 4889442410 mov qword ptr [rsp + 0x10], rax -;; 26: 48c7c002000000 mov rax, 2 -;; 2d: 4889442408 mov qword ptr [rsp + 8], rax -;; 32: 488b442418 mov rax, qword ptr [rsp + 0x18] -;; 37: 488b4c2408 mov rcx, qword ptr [rsp + 8] -;; 3c: 4839c1 cmp rcx, rax -;; 3f: b900000000 mov ecx, 0 -;; 44: 400f97c1 seta cl -;; 48: 85c9 test ecx, ecx -;; 4a: 0f8526000000 jne 0x76 -;; 50: 488b442408 mov rax, qword ptr [rsp + 8] -;; 55: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] -;; 5a: 480fafc8 imul rcx, rax -;; 5e: 48894c2410 mov qword ptr [rsp + 0x10], rcx -;; 63: 488b442408 mov rax, qword ptr [rsp + 8] -;; 68: 4883c001 add rax, 1 -;; 6c: 4889442408 mov qword ptr [rsp + 8], rax -;; 71: e9bcffffff jmp 0x32 -;; 76: 488b442410 mov rax, qword ptr [rsp + 0x10] -;; 7b: 4883c420 add rsp, 0x20 -;; 7f: 5d pop rbp -;; 80: c3 ret +;; d: 4531db xor r11d, r11d +;; 10: 4c895c2410 mov qword ptr [rsp + 0x10], r11 +;; 15: 4c895c2408 mov qword ptr [rsp + 8], r11 +;; 1a: 4c893424 mov qword ptr [rsp], r14 +;; 1e: 48c7c001000000 mov rax, 1 +;; 25: 4889442410 mov qword ptr [rsp + 0x10], rax +;; 2a: 48c7c002000000 mov rax, 2 +;; 31: 4889442408 mov qword ptr [rsp + 8], rax +;; 36: 488b442418 mov rax, qword ptr [rsp + 0x18] +;; 3b: 488b4c2408 mov rcx, qword ptr [rsp + 8] +;; 40: 4839c1 cmp rcx, rax +;; 43: b900000000 mov ecx, 0 +;; 48: 400f97c1 seta cl +;; 4c: 85c9 test ecx, ecx +;; 4e: 0f8526000000 jne 0x7a +;; 54: 488b442408 mov rax, qword ptr [rsp + 8] +;; 59: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] +;; 5e: 480fafc8 imul rcx, rax +;; 62: 48894c2410 mov qword ptr [rsp + 0x10], rcx +;; 67: 488b442408 mov rax, qword ptr [rsp + 8] +;; 6c: 4883c001 add rax, 1 +;; 70: 4889442408 mov qword ptr [rsp + 8], rax +;; 75: e9bcffffff jmp 0x36 +;; 7a: 488b442410 mov rax, qword ptr [rsp + 0x10] +;; 7f: 4883c420 add rsp, 0x20 +;; 83: 5d pop rbp +;; 84: c3 ret diff --git a/winch/filetests/filetests/x64/loop/while.wat b/winch/filetests/filetests/x64/loop/while.wat index e9773ab92a2e..adef2ad06a96 100644 --- a/winch/filetests/filetests/x64/loop/while.wat +++ b/winch/filetests/filetests/x64/loop/while.wat @@ -18,24 +18,26 @@ ;; 1: 4889e5 mov rbp, rsp ;; 4: 4883ec18 sub rsp, 0x18 ;; 8: 48897c2410 mov qword ptr [rsp + 0x10], rdi -;; d: 4c893424 mov qword ptr [rsp], r14 -;; 11: 48c7c001000000 mov rax, 1 -;; 18: 4889442408 mov qword ptr [rsp + 8], rax -;; 1d: 488b442410 mov rax, qword ptr [rsp + 0x10] -;; 22: 4883f800 cmp rax, 0 -;; 26: b800000000 mov eax, 0 -;; 2b: 400f94c0 sete al -;; 2f: 85c0 test eax, eax -;; 31: 0f8526000000 jne 0x5d -;; 37: 488b442408 mov rax, qword ptr [rsp + 8] -;; 3c: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] -;; 41: 480fafc8 imul rcx, rax -;; 45: 48894c2408 mov qword ptr [rsp + 8], rcx -;; 4a: 488b442410 mov rax, qword ptr [rsp + 0x10] -;; 4f: 4883e801 sub rax, 1 -;; 53: 4889442410 mov qword ptr [rsp + 0x10], rax -;; 58: e9c0ffffff jmp 0x1d -;; 5d: 488b442408 mov rax, qword ptr [rsp + 8] -;; 62: 4883c418 add rsp, 0x18 -;; 66: 5d pop rbp -;; 67: c3 ret +;; d: 48c744240800000000 +;; mov qword ptr [rsp + 8], 0 +;; 16: 4c893424 mov qword ptr [rsp], r14 +;; 1a: 48c7c001000000 mov rax, 1 +;; 21: 4889442408 mov qword ptr [rsp + 8], rax +;; 26: 488b442410 mov rax, qword ptr [rsp + 0x10] +;; 2b: 4883f800 cmp rax, 0 +;; 2f: b800000000 mov eax, 0 +;; 34: 400f94c0 sete al +;; 38: 85c0 test eax, eax +;; 3a: 0f8526000000 jne 0x66 +;; 40: 488b442408 mov rax, qword ptr [rsp + 8] +;; 45: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] +;; 4a: 480fafc8 imul rcx, rax +;; 4e: 48894c2408 mov qword ptr [rsp + 8], rcx +;; 53: 488b442410 mov rax, qword ptr [rsp + 0x10] +;; 58: 4883e801 sub rax, 1 +;; 5c: 4889442410 mov qword ptr [rsp + 0x10], rax +;; 61: e9c0ffffff jmp 0x26 +;; 66: 488b442408 mov rax, qword ptr [rsp + 8] +;; 6b: 4883c418 add rsp, 0x18 +;; 6f: 5d pop rbp +;; 70: c3 ret diff --git a/winch/filetests/filetests/x64/table/fill.wat b/winch/filetests/filetests/x64/table/fill.wat index 2a61212958ae..d811a646f236 100644 --- a/winch/filetests/filetests/x64/table/fill.wat +++ b/winch/filetests/filetests/x64/table/fill.wat @@ -49,46 +49,49 @@ ;; 8: 897c241c mov dword ptr [rsp + 0x1c], edi ;; c: 89742418 mov dword ptr [rsp + 0x18], esi ;; 10: 89542414 mov dword ptr [rsp + 0x14], edx -;; 14: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 19: 8b4c2418 mov ecx, dword ptr [rsp + 0x18] -;; 1d: 4c89f2 mov rdx, r14 -;; 20: 8b5a50 mov ebx, dword ptr [rdx + 0x50] -;; 23: 39d9 cmp ecx, ebx -;; 25: 0f8381000000 jae 0xac -;; 2b: 4189cb mov r11d, ecx -;; 2e: 4d6bdb08 imul r11, r11, 8 -;; 32: 488b5248 mov rdx, qword ptr [rdx + 0x48] -;; 36: 4889d6 mov rsi, rdx -;; 39: 4c01da add rdx, r11 -;; 3c: 39d9 cmp ecx, ebx -;; 3e: 480f43d6 cmovae rdx, rsi -;; 42: 488b02 mov rax, qword ptr [rdx] -;; 45: 4885c0 test rax, rax -;; 48: 0f8523000000 jne 0x71 -;; 4e: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 52: 498b5b48 mov rbx, qword ptr [r11 + 0x48] -;; 56: 4156 push r14 -;; 58: 51 push rcx -;; 59: 488b7c2408 mov rdi, qword ptr [rsp + 8] -;; 5e: be00000000 mov esi, 0 -;; 63: 8b1424 mov edx, dword ptr [rsp] -;; 66: ffd3 call rbx -;; 68: 4883c410 add rsp, 0x10 -;; 6c: e904000000 jmp 0x75 -;; 71: 4883e0fe and rax, 0xfffffffffffffffe -;; 75: 488944240c mov qword ptr [rsp + 0xc], rax -;; 7a: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 7e: 498b4368 mov rax, qword ptr [r11 + 0x68] -;; 82: 4156 push r14 -;; 84: 4883ec08 sub rsp, 8 -;; 88: 488b7c2408 mov rdi, qword ptr [rsp + 8] -;; 8d: be01000000 mov esi, 1 -;; 92: 8b54242c mov edx, dword ptr [rsp + 0x2c] -;; 96: 488b4c241c mov rcx, qword ptr [rsp + 0x1c] -;; 9b: 448b442424 mov r8d, dword ptr [rsp + 0x24] -;; a0: ffd0 call rax -;; a2: 4883c410 add rsp, 0x10 -;; a6: 4883c420 add rsp, 0x20 -;; aa: 5d pop rbp -;; ab: c3 ret -;; ac: 0f0b ud2 +;; 14: c744241000000000 mov dword ptr [rsp + 0x10], 0 +;; 1c: 48c744240800000000 +;; mov qword ptr [rsp + 8], 0 +;; 25: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 2a: 8b4c2418 mov ecx, dword ptr [rsp + 0x18] +;; 2e: 4c89f2 mov rdx, r14 +;; 31: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 34: 39d9 cmp ecx, ebx +;; 36: 0f8381000000 jae 0xbd +;; 3c: 4189cb mov r11d, ecx +;; 3f: 4d6bdb08 imul r11, r11, 8 +;; 43: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 47: 4889d6 mov rsi, rdx +;; 4a: 4c01da add rdx, r11 +;; 4d: 39d9 cmp ecx, ebx +;; 4f: 480f43d6 cmovae rdx, rsi +;; 53: 488b02 mov rax, qword ptr [rdx] +;; 56: 4885c0 test rax, rax +;; 59: 0f8523000000 jne 0x82 +;; 5f: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 63: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 67: 4156 push r14 +;; 69: 51 push rcx +;; 6a: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 6f: be00000000 mov esi, 0 +;; 74: 8b1424 mov edx, dword ptr [rsp] +;; 77: ffd3 call rbx +;; 79: 4883c410 add rsp, 0x10 +;; 7d: e904000000 jmp 0x86 +;; 82: 4883e0fe and rax, 0xfffffffffffffffe +;; 86: 488944240c mov qword ptr [rsp + 0xc], rax +;; 8b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 8f: 498b4368 mov rax, qword ptr [r11 + 0x68] +;; 93: 4156 push r14 +;; 95: 4883ec08 sub rsp, 8 +;; 99: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 9e: be01000000 mov esi, 1 +;; a3: 8b54242c mov edx, dword ptr [rsp + 0x2c] +;; a7: 488b4c241c mov rcx, qword ptr [rsp + 0x1c] +;; ac: 448b442424 mov r8d, dword ptr [rsp + 0x24] +;; b1: ffd0 call rax +;; b3: 4883c410 add rsp, 0x10 +;; b7: 4883c420 add rsp, 0x20 +;; bb: 5d pop rbp +;; bc: c3 ret +;; bd: 0f0b ud2 From bc53b71c263756701f4b2d59e7829bfedd1f084a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 1 Nov 2023 11:31:55 -0500 Subject: [PATCH 03/17] Fix flakiness in tcp_bind test (#7438) * Fix flakiness in tcp_bind test This commit adds another case to handle in the TCP bind test which binds a specific port to handle concurrent invocations of the test. Closes #7429 * Add comments --- crates/test-programs/src/bin/preview2_tcp_bind.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/test-programs/src/bin/preview2_tcp_bind.rs b/crates/test-programs/src/bin/preview2_tcp_bind.rs index 0dadd114b5d6..faea346a155f 100644 --- a/crates/test-programs/src/bin/preview2_tcp_bind.rs +++ b/crates/test-programs/src/bin/preview2_tcp_bind.rs @@ -30,7 +30,9 @@ fn test_tcp_bind_specific_port(net: &Network, ip: IpAddress) { assert_eq!(bind_addr.ip(), bound_addr.ip()); assert_eq!(bind_addr.port(), bound_addr.port()); } - Err(ErrorCode::AddressInUse) => {} + // Concurrent invocations of this test can yield `AddressInUse` and that + // same error can show up on Windows as `AccessDenied`. + Err(ErrorCode::AddressInUse | ErrorCode::AccessDenied) => {} Err(e) => panic!("error: {e}"), } } From dcc8c2bc09209e2757b1825235496f81a9e43461 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 1 Nov 2023 09:43:51 -0700 Subject: [PATCH 04/17] Wasmtime: omit ANSI color sequences in logging when not a terminal. (#7436) In #7239 we added a `tracing-log` subscriber that prints logs to stderr if enabled with an environment variable. It included logic to add ANSI color sequences when stderr is a terminal, for legibility. Unfortunately it seems that while this logic *enabled* colors on a terminal, it did not *disable* colors on a non-terminal; so redirects of stderr to a file would result in ANSI color sequences being captured in that file. Specifically, the builder seems not to default to no-color; so rather than enable-or-nothing, we should explicitly enable or disable always. Fixes #7435. --- crates/cli-flags/src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 0a496257c050..03900707ac97 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -362,12 +362,10 @@ impl CommonOptions { } else { use std::io::IsTerminal; use tracing_subscriber::{EnvFilter, FmtSubscriber}; - let mut b = FmtSubscriber::builder() + let b = FmtSubscriber::builder() .with_writer(std::io::stderr) - .with_env_filter(EnvFilter::from_env("WASMTIME_LOG")); - if std::io::stderr().is_terminal() { - b = b.with_ansi(true); - } + .with_env_filter(EnvFilter::from_env("WASMTIME_LOG")) + .with_ansi(std::io::stderr().is_terminal()); b.init(); } #[cfg(not(feature = "logging"))] From efeeaf5135e401db5629004adec1c959584fa84b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 1 Nov 2023 11:45:25 -0500 Subject: [PATCH 05/17] Fix parsing f32/f64 CLI arguments as floats (#7440) Type inference wasn't enough for this situation since floats are stored as `u32` and `u64` while they're at rest to avoid modification. Closes #7401 --- src/commands/run.rs | 4 ++-- tests/all/cli_tests.rs | 19 +++++++++++++++++++ tests/all/cli_tests/simple.wat | 2 ++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/commands/run.rs b/src/commands/run.rs index 89fe35e85246..6ca015dddf67 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -504,8 +504,8 @@ impl RunCommand { // parses base-10 representations. ValType::I32 => Val::I32(val.parse()?), ValType::I64 => Val::I64(val.parse()?), - ValType::F32 => Val::F32(val.parse()?), - ValType::F64 => Val::F64(val.parse()?), + ValType::F32 => Val::F32(val.parse::()?.to_bits()), + ValType::F64 => Val::F64(val.parse::()?.to_bits()), t => bail!("unsupported argument type {:?}", t), }); } diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index b4bc8c86c683..a3113e2da005 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -1176,6 +1176,25 @@ warning: this CLI invocation of Wasmtime is going to break in the future -- for Ok(()) } +#[test] +fn float_args() -> Result<()> { + let result = run_wasmtime(&[ + "--invoke", + "echo_f32", + "tests/all/cli_tests/simple.wat", + "1.0", + ])?; + assert_eq!(result, "1\n"); + let result = run_wasmtime(&[ + "--invoke", + "echo_f64", + "tests/all/cli_tests/simple.wat", + "1.1", + ])?; + assert_eq!(result, "1.1\n"); + Ok(()) +} + mod test_programs { use super::{get_wasmtime_command, run_wasmtime}; use anyhow::Result; diff --git a/tests/all/cli_tests/simple.wat b/tests/all/cli_tests/simple.wat index a851dfa00e2b..a62319c5e179 100644 --- a/tests/all/cli_tests/simple.wat +++ b/tests/all/cli_tests/simple.wat @@ -4,4 +4,6 @@ ) (func (export "get_f32") (result f32) f32.const 100) (func (export "get_f64") (result f64) f64.const 100) + (func (export "echo_f32") (param f32) (result f32) local.get 0) + (func (export "echo_f64") (param f64) (result f64) local.get 0) ) From c741d2477b269ed455523ef0ad8cc26fb65e61d3 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 1 Nov 2023 10:14:44 -0700 Subject: [PATCH 06/17] mpk: temporarily disable to avoid CI failures (#7446) GitHub CI runners are showing some strange behavior: on certain runners (unknown which ones), the CPUID bits claim that MPK is supported, but running any MPK code (e.g., `RDPKRU`) causes a `SIGILL` crash. This change disables MPK until #7445 is resolved. --- crates/runtime/src/mpk/pkru.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/runtime/src/mpk/pkru.rs b/crates/runtime/src/mpk/pkru.rs index 99dee74fa0f7..9e36dd35a181 100644 --- a/crates/runtime/src/mpk/pkru.rs +++ b/crates/runtime/src/mpk/pkru.rs @@ -56,6 +56,11 @@ pub fn write(pkru: u32) { /// Check the `ECX.PKU` flag (bit 3) of the `07h` `CPUID` leaf; see the /// Intel Software Development Manual, vol 3a, section 2.7. pub fn has_cpuid_bit_set() -> bool { + // TODO: disable MPK support until the following issue is resolved: + // https://github.com/bytecodealliance/wasmtime/issues/7445 + if true { + return false; + } let result = unsafe { std::arch::x86_64::__cpuid(0x07) }; (result.ecx & 0b100) != 0 } From 78b38341430ca3c29e30b5a37c0894e19da6ab9a Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 1 Nov 2023 10:18:38 -0700 Subject: [PATCH 07/17] Validate the status code in outgoing-response (#7447) --- crates/test-programs/src/bin/api_proxy.rs | 2 +- .../src/bin/api_proxy_streaming.rs | 8 +++---- .../http_outbound_request_response_build.rs | 2 +- crates/wasi-http/src/types.rs | 2 +- crates/wasi-http/src/types_impl.rs | 21 +++++++++++++------ crates/wasi-http/wit/deps/http/types.wit | 10 +++++---- crates/wasi/wit/deps/http/types.wit | 10 +++++---- 7 files changed, 34 insertions(+), 21 deletions(-) diff --git a/crates/test-programs/src/bin/api_proxy.rs b/crates/test-programs/src/bin/api_proxy.rs index 1bcdc783dd41..2170bdbbca36 100644 --- a/crates/test-programs/src/bin/api_proxy.rs +++ b/crates/test-programs/src/bin/api_proxy.rs @@ -17,7 +17,7 @@ struct T; impl bindings::exports::wasi::http::incoming_handler::Guest for T { fn handle(_request: IncomingRequest, outparam: ResponseOutparam) { let hdrs = bindings::wasi::http::types::Headers::new(); - let resp = bindings::wasi::http::types::OutgoingResponse::new(200, hdrs); + let resp = bindings::wasi::http::types::OutgoingResponse::new(hdrs); let body = resp.body().expect("outgoing response"); bindings::wasi::http::types::ResponseOutparam::set(outparam, Ok(resp)); diff --git a/crates/test-programs/src/bin/api_proxy_streaming.rs b/crates/test-programs/src/bin/api_proxy_streaming.rs index 5f1c0560c8e5..730bef9383a6 100644 --- a/crates/test-programs/src/bin/api_proxy_streaming.rs +++ b/crates/test-programs/src/bin/api_proxy_streaming.rs @@ -53,7 +53,6 @@ async fn handle_request(request: IncomingRequest, response_out: ResponseOutparam let mut results = stream::iter(results).buffer_unordered(MAX_CONCURRENCY); let response = OutgoingResponse::new( - 200, Fields::from_list(&[("content-type".to_string(), b"text/plain".to_vec())]).unwrap(), ); @@ -79,7 +78,6 @@ async fn handle_request(request: IncomingRequest, response_out: ResponseOutparam // Echo the request body without buffering it. let response = OutgoingResponse::new( - 200, Fields::from_list( &headers .into_iter() @@ -129,7 +127,6 @@ async fn handle_request(request: IncomingRequest, response_out: ResponseOutparam ); let response = OutgoingResponse::new( - 200, Fields::from_list( &headers .into_iter() @@ -242,7 +239,10 @@ fn method_not_allowed(response_out: ResponseOutparam) { } fn respond(status: u16, response_out: ResponseOutparam) { - let response = OutgoingResponse::new(status, Fields::new()); + let response = OutgoingResponse::new(Fields::new()); + response + .set_status_code(status) + .expect("setting status code"); let body = response.body().expect("response should be writable"); diff --git a/crates/test-programs/src/bin/http_outbound_request_response_build.rs b/crates/test-programs/src/bin/http_outbound_request_response_build.rs index 8a50ff6fa693..c2360d299180 100644 --- a/crates/test-programs/src/bin/http_outbound_request_response_build.rs +++ b/crates/test-programs/src/bin/http_outbound_request_response_build.rs @@ -27,7 +27,7 @@ fn main() { "application/text".to_string().into_bytes(), )]) .unwrap(); - let response = http_types::OutgoingResponse::new(200, headers); + let response = http_types::OutgoingResponse::new(headers); let outgoing_body = response.body().unwrap(); let response_body = outgoing_body.write().unwrap(); response_body diff --git a/crates/wasi-http/src/types.rs b/crates/wasi-http/src/types.rs index dc41a30c6a77..e962ff47cf27 100644 --- a/crates/wasi-http/src/types.rs +++ b/crates/wasi-http/src/types.rs @@ -289,7 +289,7 @@ pub struct HostIncomingResponse { } pub struct HostOutgoingResponse { - pub status: u16, + pub status: http::StatusCode, pub headers: FieldMap, pub body: Option, } diff --git a/crates/wasi-http/src/types_impl.rs b/crates/wasi-http/src/types_impl.rs index 2f7612af1765..bce0384ab7db 100644 --- a/crates/wasi-http/src/types_impl.rs +++ b/crates/wasi-http/src/types_impl.rs @@ -632,13 +632,12 @@ impl crate::bindings::http::types::HostIncomingBody for T { impl crate::bindings::http::types::HostOutgoingResponse for T { fn new( &mut self, - status: StatusCode, headers: Resource, ) -> wasmtime::Result> { let fields = move_fields(self.table(), headers)?; let id = self.table().push(HostOutgoingResponse { - status, + status: http::StatusCode::OK, headers: fields, body: None, })?; @@ -669,16 +668,26 @@ impl crate::bindings::http::types::HostOutgoingResponse for T { &mut self, id: Resource, ) -> wasmtime::Result { - Ok(self.table().get(&id)?.status) + Ok(self.table().get(&id)?.status.into()) } fn set_status_code( &mut self, id: Resource, status: types::StatusCode, - ) -> wasmtime::Result<()> { - self.table().get_mut(&id)?.status = status; - Ok(()) + ) -> wasmtime::Result> { + let resp = self.table().get_mut(&id)?; + + match http::StatusCode::from_u16(status) { + Ok(status) => resp.status = status, + Err(_) => { + return Ok(Err(Error::UnexpectedError( + "Invalid status code".to_string(), + ))) + } + }; + + Ok(Ok(())) } fn headers( diff --git a/crates/wasi-http/wit/deps/http/types.wit b/crates/wasi-http/wit/deps/http/types.wit index 9a0c8956b540..81166b011f7f 100644 --- a/crates/wasi-http/wit/deps/http/types.wit +++ b/crates/wasi-http/wit/deps/http/types.wit @@ -351,16 +351,18 @@ interface types { /// Represents an outgoing HTTP Response. resource outgoing-response { - /// Construct an `outgoing-response`. + /// Construct an `outgoing-response`, with a default `status-code` of `200`. + /// If a different `status-code` is needed, it must be set via the + /// `set-status-code` method. /// - /// * `status-code` is the HTTP Status Code for the Response. /// * `headers` is the HTTP Headers for the Response. - constructor(status-code: status-code, headers: headers); + constructor(headers: headers); /// Get the HTTP Status Code for the Response. status-code: func() -> status-code; + /// Set the HTTP Status Code for the Response. - set-status-code: func(status-code: status-code); + set-status-code: func(status-code: status-code) -> result<_, error>; /// Get the headers associated with the Request. /// diff --git a/crates/wasi/wit/deps/http/types.wit b/crates/wasi/wit/deps/http/types.wit index 9a0c8956b540..81166b011f7f 100644 --- a/crates/wasi/wit/deps/http/types.wit +++ b/crates/wasi/wit/deps/http/types.wit @@ -351,16 +351,18 @@ interface types { /// Represents an outgoing HTTP Response. resource outgoing-response { - /// Construct an `outgoing-response`. + /// Construct an `outgoing-response`, with a default `status-code` of `200`. + /// If a different `status-code` is needed, it must be set via the + /// `set-status-code` method. /// - /// * `status-code` is the HTTP Status Code for the Response. /// * `headers` is the HTTP Headers for the Response. - constructor(status-code: status-code, headers: headers); + constructor(headers: headers); /// Get the HTTP Status Code for the Response. status-code: func() -> status-code; + /// Set the HTTP Status Code for the Response. - set-status-code: func(status-code: status-code); + set-status-code: func(status-code: status-code) -> result<_, error>; /// Get the headers associated with the Request. /// From e9d62746ef896432cde5d68a91a12475de1652fe Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 1 Nov 2023 13:09:09 -0500 Subject: [PATCH 08/17] Fix writes being flushed to stdio (#7441) * Fix writes being flushed to stdio This commit fixes an accidental issue with writing to stdout with the `wasi-common` implementation where writes were line-buffered by the Rust standard library rather than being flushed out immediately as WASI semantics required. Closes #7437 * Fix warning in test --- .../src/bin/cli_stdio_write_flushes.rs | 10 +++++ crates/wasi-common/cap-std-sync/src/stdio.rs | 7 +++- tests/all/cli_tests.rs | 38 ++++++++++++++++++- 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 crates/test-programs/src/bin/cli_stdio_write_flushes.rs diff --git a/crates/test-programs/src/bin/cli_stdio_write_flushes.rs b/crates/test-programs/src/bin/cli_stdio_write_flushes.rs new file mode 100644 index 000000000000..64d0ccdc6443 --- /dev/null +++ b/crates/test-programs/src/bin/cli_stdio_write_flushes.rs @@ -0,0 +1,10 @@ +use std::io::Write; + +fn main() { + print!("> "); + std::io::stdout().flush().unwrap(); + + let mut s = String::new(); + std::io::stdin().read_line(&mut s).unwrap(); + assert!(s.is_empty()); +} diff --git a/crates/wasi-common/cap-std-sync/src/stdio.rs b/crates/wasi-common/cap-std-sync/src/stdio.rs index b2723d076c98..af4d52faab28 100644 --- a/crates/wasi-common/cap-std-sync/src/stdio.rs +++ b/crates/wasi-common/cap-std-sync/src/stdio.rs @@ -127,7 +127,12 @@ macro_rules! wasi_file_write_impl { Ok(FdFlags::APPEND) } async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { - let n = self.0.lock().write_vectored(bufs)?; + let mut io = self.0.lock(); + let n = io.write_vectored(bufs)?; + // On a successful write additionally flush out the bytes to + // handle stdio buffering done by libstd since WASI interfaces + // here aren't buffered. + io.flush()?; Ok(n.try_into().map_err(|_| { Error::range().context("converting write_vectored total length") })?) diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index a3113e2da005..ff336feb8daf 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -1198,7 +1198,7 @@ fn float_args() -> Result<()> { mod test_programs { use super::{get_wasmtime_command, run_wasmtime}; use anyhow::Result; - use std::io::Write; + use std::io::{Read, Write}; use std::process::Stdio; use test_programs_artifacts::*; @@ -1449,4 +1449,40 @@ mod test_programs { assert!(output.status.success()); Ok(()) } + + // Test to ensure that prints in the guest aren't buffered on the host by + // accident. The test here will print something without a newline and then + // wait for input on stdin, and the test here is to ensure that the + // character shows up here even as the guest is waiting on input via stdin. + #[test] + fn cli_stdio_write_flushes() -> Result<()> { + fn run(args: &[&str]) -> Result<()> { + println!("running {args:?}"); + let mut child = get_wasmtime_command()? + .args(args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + let mut stdout = child.stdout.take().unwrap(); + let mut buf = [0; 10]; + match stdout.read(&mut buf) { + Ok(2) => assert_eq!(&buf[..2], b"> "), + e => panic!("unexpected read result {e:?}"), + } + drop(stdout); + drop(child.stdin.take().unwrap()); + let status = child.wait()?; + assert!(status.success()); + Ok(()) + } + + run(&["run", "-Spreview2=n", CLI_STDIO_WRITE_FLUSHES])?; + run(&["run", "-Spreview2=y", CLI_STDIO_WRITE_FLUSHES])?; + run(&[ + "run", + "-Wcomponent-model", + CLI_STDIO_WRITE_FLUSHES_COMPONENT, + ])?; + Ok(()) + } } From 9ab2e0a65f718dab12976ae0f37b8e1dba43e9eb Mon Sep 17 00:00:00 2001 From: Jeffrey Charles Date: Wed, 1 Nov 2023 11:23:01 -0700 Subject: [PATCH 09/17] popcnt should check for sse4.2 support in Winch (#7449) --- winch/codegen/src/isa/x64/asm.rs | 5 ++- winch/codegen/src/isa/x64/masm.rs | 2 +- .../filetests/x64/i32_popcnt/no_sse42.wat | 33 +++++++++++++++ .../filetests/x64/i64_popcnt/no_sse42.wat | 40 +++++++++++++++++++ 4 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 winch/filetests/filetests/x64/i32_popcnt/no_sse42.wat create mode 100644 winch/filetests/filetests/x64/i64_popcnt/no_sse42.wat diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index e44bdc4fc044..98f7723c6cce 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -773,7 +773,10 @@ impl Assembler { } pub fn popcnt(&mut self, src: Reg, size: OperandSize) { - assert!(self.isa_flags.has_popcnt(), "Requires has_popcnt flag"); + assert!( + self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(), + "Requires has_popcnt and has_sse42 flags" + ); self.emit(Inst::UnaryRmR { size: size.into(), op: args::UnaryRmROpcode::Popcnt, diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index f28ab3196173..e534b040a678 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -799,7 +799,7 @@ impl Masm for MacroAssembler { fn popcnt(&mut self, context: &mut CodeGenContext, size: OperandSize) { let src = context.pop_to_reg(self, None); - if self.flags.has_popcnt() { + if self.flags.has_popcnt() && self.flags.has_sse42() { self.asm.popcnt(src.into(), size); context.stack.push(src.into()); } else { diff --git a/winch/filetests/filetests/x64/i32_popcnt/no_sse42.wat b/winch/filetests/filetests/x64/i32_popcnt/no_sse42.wat new file mode 100644 index 000000000000..dc3567e84786 --- /dev/null +++ b/winch/filetests/filetests/x64/i32_popcnt/no_sse42.wat @@ -0,0 +1,33 @@ +;;! target = "x86_64" +;;! flags = ["has_popcnt"] + +(module + (func (result i32) + i32.const 3 + i32.popcnt + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec08 sub rsp, 8 +;; 8: 4c893424 mov qword ptr [rsp], r14 +;; c: b803000000 mov eax, 3 +;; 11: 89c1 mov ecx, eax +;; 13: c1e801 shr eax, 1 +;; 16: 81e055555555 and eax, 0x55555555 +;; 1c: 29c1 sub ecx, eax +;; 1e: 89c8 mov eax, ecx +;; 20: 41bb33333333 mov r11d, 0x33333333 +;; 26: 4421d8 and eax, r11d +;; 29: c1e902 shr ecx, 2 +;; 2c: 4421d9 and ecx, r11d +;; 2f: 01c1 add ecx, eax +;; 31: 89c8 mov eax, ecx +;; 33: c1e804 shr eax, 4 +;; 36: 01c8 add eax, ecx +;; 38: 81e00f0f0f0f and eax, 0xf0f0f0f +;; 3e: 69c001010101 imul eax, eax, 0x1010101 +;; 44: c1e818 shr eax, 0x18 +;; 47: 4883c408 add rsp, 8 +;; 4b: 5d pop rbp +;; 4c: c3 ret diff --git a/winch/filetests/filetests/x64/i64_popcnt/no_sse42.wat b/winch/filetests/filetests/x64/i64_popcnt/no_sse42.wat new file mode 100644 index 000000000000..7430a14ff68c --- /dev/null +++ b/winch/filetests/filetests/x64/i64_popcnt/no_sse42.wat @@ -0,0 +1,40 @@ +;;! target = "x86_64" +;;! flags = ["has_popcnt"] + +(module + (func (result i64) + i64.const 3 + i64.popcnt + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec08 sub rsp, 8 +;; 8: 4c893424 mov qword ptr [rsp], r14 +;; c: 48c7c003000000 mov rax, 3 +;; 13: 4889c1 mov rcx, rax +;; 16: 48c1e801 shr rax, 1 +;; 1a: 49bb5555555555555555 +;; movabs r11, 0x5555555555555555 +;; 24: 4c21d8 and rax, r11 +;; 27: 4829c1 sub rcx, rax +;; 2a: 4889c8 mov rax, rcx +;; 2d: 49bb3333333333333333 +;; movabs r11, 0x3333333333333333 +;; 37: 4c21d8 and rax, r11 +;; 3a: 48c1e902 shr rcx, 2 +;; 3e: 4c21d9 and rcx, r11 +;; 41: 4801c1 add rcx, rax +;; 44: 4889c8 mov rax, rcx +;; 47: 48c1e804 shr rax, 4 +;; 4b: 4801c8 add rax, rcx +;; 4e: 49bb0f0f0f0f0f0f0f0f +;; movabs r11, 0xf0f0f0f0f0f0f0f +;; 58: 4c21d8 and rax, r11 +;; 5b: 49bb0101010101010101 +;; movabs r11, 0x101010101010101 +;; 65: 490fafc3 imul rax, r11 +;; 69: 48c1e838 shr rax, 0x38 +;; 6d: 4883c408 add rsp, 8 +;; 71: 5d pop rbp +;; 72: c3 ret From f2edcc69a4292d62bb9bdd152db3f380cfdc4760 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 1 Nov 2023 21:19:33 +0000 Subject: [PATCH 10/17] egraphs: Remove extends and reduces from the shift amount (#7439) * egraphs: Remove extends and reduces to the shift input in shift instructions We support variations of this instruction with different inputs, so lets take advantage of this where possible. * egraphs: Remove `iconcat` as a shift input --- cranelift/codegen/src/opts/shifts.isle | 42 ++++ .../filetests/filetests/egraph/shifts.clif | 209 ++++++++++++++++++ 2 files changed, 251 insertions(+) diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 8dcf153d1a98..1b33d89d8c4f 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -102,3 +102,45 @@ (rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) (sshr ty x sconst)) + +;; Shifts and rotates allow a different type for the shift amount, so we +;; can remove any extend/reduce operations on the shift amount. +;; +;; (op x (ireduce y)) == (op x y) +;; (op x (uextend y)) == (op x y) +;; (op x (sextend y)) == (op x y) +;; +;; where `op` is one of ishl, ushr, sshr, rotl, rotr +;; +;; TODO: This rule is restricted to <=64 bits for ireduce since the x86 +;; backend doesn't support SIMD shifts with 128-bit shift amounts. + +(rule (simplify (ishl ty x (ireduce _ y @ (value_type (fits_in_64 _))))) (ishl ty x y)) +(rule (simplify (ishl ty x (uextend _ y))) (ishl ty x y)) +(rule (simplify (ishl ty x (sextend _ y))) (ishl ty x y)) +(rule (simplify (ushr ty x (ireduce _ y @ (value_type (fits_in_64 _))))) (ushr ty x y)) +(rule (simplify (ushr ty x (uextend _ y))) (ushr ty x y)) +(rule (simplify (ushr ty x (sextend _ y))) (ushr ty x y)) +(rule (simplify (sshr ty x (ireduce _ y @ (value_type (fits_in_64 _))))) (sshr ty x y)) +(rule (simplify (sshr ty x (uextend _ y))) (sshr ty x y)) +(rule (simplify (sshr ty x (sextend _ y))) (sshr ty x y)) +(rule (simplify (rotr ty x (ireduce _ y @ (value_type (fits_in_64 _))))) (rotr ty x y)) +(rule (simplify (rotr ty x (uextend _ y))) (rotr ty x y)) +(rule (simplify (rotr ty x (sextend _ y))) (rotr ty x y)) +(rule (simplify (rotl ty x (ireduce _ y @ (value_type (fits_in_64 _))))) (rotl ty x y)) +(rule (simplify (rotl ty x (uextend _ y))) (rotl ty x y)) +(rule (simplify (rotl ty x (sextend _ y))) (rotl ty x y)) + +;; Remove iconcat from the shift amount input. This is correct even if the +;; the iconcat is i8 type, since it can represent the largest shift amount +;; for i128 types. +;; +;; (op x (iconcat y1 y2)) == (op x y1) +;; +;; where `op` is one of ishl, ushr, sshr, rotl, rotr + +(rule (simplify (ishl ty x (iconcat _ y _))) (ishl ty x y)) +(rule (simplify (ushr ty x (iconcat _ y _))) (ushr ty x y)) +(rule (simplify (sshr ty x (iconcat _ y _))) (sshr ty x y)) +(rule (simplify (rotr ty x (iconcat _ y _))) (rotr ty x y)) +(rule (simplify (rotl ty x (iconcat _ y _))) (rotl ty x y)) diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index b8028c50e6f0..7852ada12630 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -315,3 +315,212 @@ block0(v0: i64): ; check: v8 = uextend.i64 v7 ; check: return v8 } + +function %ishl_amt_type_ireduce(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ireduce.i8 v1 + v3 = ishl.i8 v0, v2 + return v3 +} + +; check: v4 = ishl v0, v1 +; check: return v4 + +function %ishl_amt_type_sextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sextend.i32 v1 + v3 = ishl.i8 v0, v2 + return v3 +} + +; check: v4 = ishl v0, v1 +; check: return v4 + +function %ishl_amt_type_uextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = uextend.i32 v1 + v3 = ishl.i8 v0, v2 + return v3 +} + +; check: v4 = ishl v0, v1 +; check: return v4 + + +function %ushr_amt_type_ireduce(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ireduce.i8 v1 + v3 = ushr.i8 v0, v2 + return v3 +} + +; check: v4 = ushr v0, v1 +; check: return v4 + +function %ushr_amt_type_sextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sextend.i32 v1 + v3 = ushr.i8 v0, v2 + return v3 +} + +; check: v4 = ushr v0, v1 +; check: return v4 + +function %ushr_amt_type_uextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = uextend.i32 v1 + v3 = ushr.i8 v0, v2 + return v3 +} + +; check: v4 = ushr v0, v1 +; check: return v4 + + +function %sshr_amt_type_ireduce(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ireduce.i8 v1 + v3 = sshr.i8 v0, v2 + return v3 +} + +; check: v4 = sshr v0, v1 +; check: return v4 + +function %sshr_amt_type_sextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sextend.i32 v1 + v3 = sshr.i8 v0, v2 + return v3 +} + +; check: v4 = sshr v0, v1 +; check: return v4 + +function %sshr_amt_type_uextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = uextend.i32 v1 + v3 = sshr.i8 v0, v2 + return v3 +} + +; check: v4 = sshr v0, v1 +; check: return v4 + + +function %rotr_amt_type_ireduce(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ireduce.i8 v1 + v3 = rotr.i8 v0, v2 + return v3 +} + +; check: v4 = rotr v0, v1 +; check: return v4 + +function %rotr_amt_type_sextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sextend.i32 v1 + v3 = rotr.i8 v0, v2 + return v3 +} + +; check: v4 = rotr v0, v1 +; check: return v4 + +function %rotr_amt_type_uextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = uextend.i32 v1 + v3 = rotr.i8 v0, v2 + return v3 +} + +; check: v4 = rotr v0, v1 +; check: return v4 + + +function %rotl_amt_type_ireduce(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ireduce.i8 v1 + v3 = rotl.i8 v0, v2 + return v3 +} + +; check: v4 = rotl v0, v1 +; check: return v4 + +function %rotl_amt_type_sextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sextend.i32 v1 + v3 = rotl.i8 v0, v2 + return v3 +} + +; check: v4 = rotl v0, v1 +; check: return v4 + +function %rotl_amt_type_uextend(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = uextend.i32 v1 + v3 = rotl.i8 v0, v2 + return v3 +} + +; check: v4 = rotl v0, v1 +; check: return v4 + + +function %ishl_amt_type_iconcat(i8, i16, i16) -> i8 { +block0(v0: i8, v1: i16, v2: i16): + v3 = iconcat.i16 v1, v2 + v4 = ishl.i8 v0, v3 + return v4 +} + +; check: v5 = ishl v0, v1 +; check: return v5 + + +function %ushr_amt_type_iconcat(i8, i16, i16) -> i8 { +block0(v0: i8, v1: i16, v2: i16): + v3 = iconcat.i16 v1, v2 + v4 = ushr.i8 v0, v3 + return v4 +} + +; check: v5 = ushr v0, v1 +; check: return v5 + + +function %sshr_amt_type_iconcat(i8, i16, i16) -> i8 { +block0(v0: i8, v1: i16, v2: i16): + v3 = iconcat.i16 v1, v2 + v4 = sshr.i8 v0, v3 + return v4 +} + +; check: v5 = sshr v0, v1 +; check: return v5 + + +function %rotr_amt_type_iconcat(i8, i16, i16) -> i8 { +block0(v0: i8, v1: i16, v2: i16): + v3 = iconcat.i16 v1, v2 + v4 = rotr.i8 v0, v3 + return v4 +} + +; check: v5 = rotr v0, v1 +; check: return v5 + + +function %rotl_amt_type_iconcat(i8, i16, i16) -> i8 { +block0(v0: i8, v1: i16, v2: i16): + v3 = iconcat.i16 v1, v2 + v4 = rotl.i8 v0, v3 + return v4 +} + +; check: v5 = rotl v0, v1 +; check: return v5 From c56cdb373268a06d196f93f0fb31871ae489b986 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Wed, 1 Nov 2023 16:31:01 -0600 Subject: [PATCH 11/17] add `reset_adapter_state` export to adapter (#7444) This is useful as the last step of component pre-initialization with e.g. [component-init](https://github.com/dicej/component-init), in which case we want the adapter to forget about any open handles it has (and force it to re-request the stdio handles next time they're needed) since it will be talking to a brand new host at runtime. Signed-off-by: Joel Dice --- .../src/lib.rs | 68 +++++++++++-------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/crates/wasi-preview1-component-adapter/src/lib.rs b/crates/wasi-preview1-component-adapter/src/lib.rs index 66d56da23589..e9f5c6f0f5fe 100644 --- a/crates/wasi-preview1-component-adapter/src/lib.rs +++ b/crates/wasi-preview1-component-adapter/src/lib.rs @@ -94,6 +94,14 @@ impl TrappingUnwrap for Result { } } +#[no_mangle] +pub unsafe extern "C" fn reset_adapter_state() { + let state = get_state_ptr(); + if !state.is_null() { + State::init(state) + } +} + #[no_mangle] pub unsafe extern "C" fn cabi_import_realloc( old_ptr: *mut u8, @@ -2385,8 +2393,8 @@ enum AllocationState { #[allow(improper_ctypes)] extern "C" { - fn get_state_ptr() -> *const State; - fn set_state_ptr(state: *const State); + fn get_state_ptr() -> *mut State; + fn set_state_ptr(state: *mut State); fn get_allocation_state() -> AllocationState; fn set_allocation_state(state: AllocationState); } @@ -2415,7 +2423,7 @@ impl State { } #[cold] - fn new() -> &'static State { + fn new() -> *mut State { #[link(wasm_import_module = "__main_module__")] extern "C" { fn cabi_realloc( @@ -2445,31 +2453,37 @@ impl State { unsafe { set_allocation_state(AllocationState::StateAllocated) }; unsafe { - ret.write(State { - magic1: MAGIC, - magic2: MAGIC, - import_alloc: ImportAlloc::new(), - descriptors: RefCell::new(None), - path_buf: UnsafeCell::new(MaybeUninit::uninit()), - long_lived_arena: BumpArena::new(), - args: Cell::new(None), - env_vars: Cell::new(None), - dirent_cache: DirentCache { - stream: Cell::new(None), - for_fd: Cell::new(0), - cookie: Cell::new(wasi::DIRCOOKIE_START), - cached_dirent: Cell::new(wasi::Dirent { - d_next: 0, - d_ino: 0, - d_type: FILETYPE_UNKNOWN, - d_namlen: 0, - }), - path_data: UnsafeCell::new(MaybeUninit::uninit()), - }, - dotdot: [UnsafeCell::new(b'.'), UnsafeCell::new(b'.')], - }); - &*ret + Self::init(ret); } + + ret + } + + #[cold] + unsafe fn init(state: *mut State) { + state.write(State { + magic1: MAGIC, + magic2: MAGIC, + import_alloc: ImportAlloc::new(), + descriptors: RefCell::new(None), + path_buf: UnsafeCell::new(MaybeUninit::uninit()), + long_lived_arena: BumpArena::new(), + args: Cell::new(None), + env_vars: Cell::new(None), + dirent_cache: DirentCache { + stream: Cell::new(None), + for_fd: Cell::new(0), + cookie: Cell::new(wasi::DIRCOOKIE_START), + cached_dirent: Cell::new(wasi::Dirent { + d_next: 0, + d_ino: 0, + d_type: FILETYPE_UNKNOWN, + d_namlen: 0, + }), + path_data: UnsafeCell::new(MaybeUninit::uninit()), + }, + dotdot: [UnsafeCell::new(b'.'), UnsafeCell::new(b'.')], + }); } /// Accessor for the descriptors member that ensures it is properly initialized From e0bfa7336de20f76048edbdc0157ee637a2c5fea Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 1 Nov 2023 16:05:20 -0700 Subject: [PATCH 12/17] cranelift: Reassociate constants out of nested shifts (#7450) This allows for more constant propagation. Co-authored-by: Trevor Elliott --- cranelift/codegen/src/opts/cprop.isle | 10 +++++ .../reassociate-constants-in-shifts.clif | 41 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 cranelift/filetests/filetests/egraph/reassociate-constants-in-shifts.clif diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index 19f5f2aacdf9..26580a7ea0c8 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -198,3 +198,13 @@ (rule (splat32 n) (splat64 (u64_or n (u64_shl n 32)))) (decl splat64 (u64) Constant) (extern constructor splat64 splat64) + +;; Reassociate nested shifts of constants to put constants together for cprop. +;; +;; ((A shift b) shift C) ==> ((A shift C) shift b) +(rule (simplify (ishl ty (ishl ty a@(iconst _ _) b) c@(iconst _ _))) + (ishl ty (ishl ty a c) b)) +(rule (simplify (ushr ty (ushr ty a@(iconst _ _) b) c@(iconst _ _))) + (ushr ty (ushr ty a c) b)) +(rule (simplify (sshr ty (sshr ty a@(iconst _ _) b) c@(iconst _ _))) + (sshr ty (sshr ty a c) b)) diff --git a/cranelift/filetests/filetests/egraph/reassociate-constants-in-shifts.clif b/cranelift/filetests/filetests/egraph/reassociate-constants-in-shifts.clif new file mode 100644 index 000000000000..9d50399d5b1c --- /dev/null +++ b/cranelift/filetests/filetests/egraph/reassociate-constants-in-shifts.clif @@ -0,0 +1,41 @@ +test optimize +set opt_level=speed +target x86_64 + +;; Test egraph rewrite rules that reassociate constants out of nested shifts. + +function %a(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = ishl v1, v0 + v3 = iconst.i32 2 + v4 = ishl v2, v3 + return v4 +; check: v6 = iconst.i32 4 +; nextln: v7 = ishl v6, v0 +; check: return v7 +} + +function %b(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 8 + v2 = ushr v1, v0 + v3 = iconst.i32 2 + v4 = ushr v2, v3 + return v4 +; check: v3 = iconst.i32 2 +; nextln: v6 = ushr v3, v0 +; check: return v6 +} + +function %c(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 8 + v2 = sshr v1, v0 + v3 = iconst.i32 2 + v4 = sshr v2, v3 + return v4 +; check: v3 = iconst.i32 2 +; nextln: v6 = sshr v3, v0 +; check: return v6 +} From 8e1b8ba4423957e8a14b33dced5f1b88abef1123 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 2 Nov 2023 17:26:06 +0000 Subject: [PATCH 13/17] egraphs: Merge consecutive shifts and rotates (#7453) * egraphs: Use `ty_shift_mask` where applicable * cranelift: Move `ty_shift_mask` into prelude * egraphs: Add consecutive shift rules * egraphs: Support overflow rules for i128 * egraphs: Improve rotate unification rules * egraphs: Add oposite rotate rules * egraphs: Allow merging rotates with different type shift amounts * egraphs: Reorganize rotate rules * egraphs: Clarify shift identities Co-Authored-By: Nick Fitzgerald --------- Co-authored-by: Nick Fitzgerald --- cranelift/codegen/src/isa/riscv64/inst.isle | 3 - cranelift/codegen/src/isa/riscv64/lower.isle | 14 +- cranelift/codegen/src/opts/bitops.isle | 4 +- cranelift/codegen/src/opts/shifts.isle | 126 +++++++++- cranelift/codegen/src/prelude.isle | 4 + cranelift/codegen/src/prelude_opt.isle | 13 + .../filetests/filetests/egraph/shifts.clif | 234 ++++++++++++++++++ 7 files changed, 385 insertions(+), 13 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 8627f2a293fa..0c4e1e3b5118 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1783,9 +1783,6 @@ -;; Generate a mask for the bit-width of the given type -(decl pure shift_mask (Type) u64) -(rule (shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1)) ;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants (decl i64_generate_imm (Imm20 Imm12) i64) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 0d3a271f4df6..e3fa6a9f2e5a 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1193,7 +1193,7 @@ ;; 8/16 bit types need a mask on the shift amount (rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y))) - (if-let mask (u64_to_imm12 (shift_mask ty))) + (if-let mask (u64_to_imm12 (ty_shift_mask ty))) (rv_sllw x (rv_andi (value_regs_get y 0) mask))) ;; Using the 32bit version of `sll` automatically masks the shift amount. @@ -1206,12 +1206,12 @@ ;; If the shift amount is known. We can mask it and encode it in the instruction. (rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y))))) - (rv_slliw x (imm12_and y (shift_mask ty)))) + (rv_slliw x (imm12_and y (ty_shift_mask ty)))) ;; We technically don't need to mask the shift amount here. The instruction ;; does the right thing. But it's neater when pretty printing it. (rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y))))) - (rv_slli x (imm12_and y (shift_mask ty)))) + (rv_slli x (imm12_and y (ty_shift_mask ty)))) ;; With `Zba` we have a shift that zero extends the LHS argument. (rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) @@ -1253,7 +1253,7 @@ ;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be ;; zero extended. (rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y))) - (if-let mask (u64_to_imm12 (shift_mask ty))) + (if-let mask (u64_to_imm12 (ty_shift_mask ty))) (rv_srlw (zext x) (rv_andi (value_regs_get y 0) mask))) ;; Using the 32bit version of `srl` automatically masks the shift amount. @@ -1266,7 +1266,7 @@ ;; When the RHS is known we can just encode it in the instruction. (rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y))))) - (rv_srliw (zext x) (imm12_and y (shift_mask ty)))) + (rv_srliw (zext x) (imm12_and y (ty_shift_mask ty)))) (rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y))))) (rv_srliw x y)) @@ -1308,7 +1308,7 @@ ;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be ;; zero extended. (rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y))) - (if-let mask (u64_to_imm12 (shift_mask ty))) + (if-let mask (u64_to_imm12 (ty_shift_mask ty))) (rv_sraw (sext x) (rv_andi (value_regs_get y 0) mask))) ;; Using the 32bit version of `sra` automatically masks the shift amount. @@ -1321,7 +1321,7 @@ ;; When the RHS is known we can just encode it in the instruction. (rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y))))) - (rv_sraiw (sext x) (imm12_and y (shift_mask ty)))) + (rv_sraiw (sext x) (imm12_and y (ty_shift_mask ty)))) (rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y))))) (rv_sraiw x y)) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index 018db93b62a1..6c89e0f07d81 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -98,11 +98,11 @@ ;; (x | -x) sets the sign bit to 1 if x is nonzero, and 0 if x is zero. sshr propagates ;; the sign bit to the rest of the value. (rule (simplify (sshr ty (bor ty x (ineg ty x)) (iconst ty (u64_from_imm64 shift_amt)))) - (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (if-let $true (u64_eq shift_amt (ty_shift_mask ty))) (bmask ty x)) (rule (simplify (sshr ty (bor ty (ineg ty x) x) (iconst ty (u64_from_imm64 shift_amt)))) - (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (if-let $true (u64_eq shift_amt (ty_shift_mask ty))) (bmask ty x)) ;; Matches any expressions that preserve "truthiness". diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 1b33d89d8c4f..acf53946cbca 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -100,7 +100,7 @@ ;; ineg(ushr(x, k)) == sshr(x, k) when k == ty_bits - 1. (rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) - (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (if-let $true (u64_eq shift_amt (ty_shift_mask ty))) (sshr ty x sconst)) ;; Shifts and rotates allow a different type for the shift amount, so we @@ -144,3 +144,127 @@ (rule (simplify (sshr ty x (iconcat _ y _))) (sshr ty x y)) (rule (simplify (rotr ty x (iconcat _ y _))) (rotr ty x y)) (rule (simplify (rotl ty x (iconcat _ y _))) (rotl ty x y)) + +;; Try to combine the shift amount from multiple consecutive shifts +;; This only works if the shift amount remains smaller than the bit +;; width of the type. +;; +;; (ishl (ishl x k1) k2) == (ishl x (add k1 k2)) if shift_mask(k1) + shift_mask(k2) < ty_bits +;; (ushr (ushr x k1) k2) == (ushr x (add k1 k2)) if shift_mask(k1) + shift_mask(k2) < ty_bits +;; (sshr (sshr x k1) k2) == (sshr x (add k1 k2)) if shift_mask(k1) + shift_mask(k2) < ty_bits +(rule (simplify (ishl ty + (ishl ty x (iconst kty (u64_from_imm64 k1))) + (iconst _ (u64_from_imm64 k2)))) + (if-let shift_amt (u64_add + (u64_and k1 (ty_shift_mask ty)) + (u64_and k2 (ty_shift_mask ty)))) + (if-let $true (u64_lt shift_amt (ty_bits_u64 (lane_type ty)))) + (ishl ty x (iconst_u64 kty shift_amt))) + +(rule (simplify (ushr ty + (ushr ty x (iconst kty (u64_from_imm64 k1))) + (iconst _ (u64_from_imm64 k2)))) + (if-let shift_amt (u64_add + (u64_and k1 (ty_shift_mask ty)) + (u64_and k2 (ty_shift_mask ty)))) + (if-let $true (u64_lt shift_amt (ty_bits_u64 (lane_type ty)))) + (ushr ty x (iconst_u64 kty shift_amt))) + +(rule (simplify (sshr ty + (sshr ty x (iconst kty (u64_from_imm64 k1))) + (iconst _ (u64_from_imm64 k2)))) + (if-let shift_amt (u64_add + (u64_and k1 (ty_shift_mask ty)) + (u64_and k2 (ty_shift_mask ty)))) + (if-let $true (u64_lt shift_amt (ty_bits_u64 (lane_type ty)))) + (sshr ty x (iconst_u64 kty shift_amt))) + +;; Simliarly, if the shift amount overflows the type, then we can turn +;; it into a 0 +;; +;; (ishl (ishl x k1) k2) == 0 if shift_mask(k1) + shift_mask(k2) >= ty_bits +;; (ushr (ushr x k1) k2) == 0 if shift_mask(k1) + shift_mask(k2) >= ty_bits +(rule (simplify (ishl ty + (ishl ty x (iconst _ (u64_from_imm64 k1))) + (iconst _ (u64_from_imm64 k2)))) + (if-let shift_amt (u64_add + (u64_and k1 (ty_shift_mask ty)) + (u64_and k2 (ty_shift_mask ty)))) + (if-let $true (u64_le (ty_bits_u64 ty) shift_amt)) + (subsume (iconst_u64 ty 0))) + +(rule (simplify (ushr ty + (ushr ty x (iconst _ (u64_from_imm64 k1))) + (iconst _ (u64_from_imm64 k2)))) + (if-let shift_amt (u64_add + (u64_and k1 (ty_shift_mask ty)) + (u64_and k2 (ty_shift_mask ty)))) + (if-let $true (u64_le (ty_bits_u64 ty) shift_amt)) + (subsume (iconst_u64 ty 0))) + +;; (rotl (rotr x y) y) == x +;; (rotr (rotl x y) y) == x +(rule (simplify (rotl ty (rotr ty x y) y)) (subsume x)) +(rule (simplify (rotr ty (rotl ty x y) y)) (subsume x)) + +;; Emits an iadd for two values. If they have different types +;; then the smaller type is zero extended to the larger type. +(decl iadd_uextend (Value Value) Value) +(rule 1 (iadd_uextend x @ (value_type ty) y @ (value_type ty)) + (iadd ty x y)) +(rule 2 (iadd_uextend x @ (value_type x_ty) y @ (value_type y_ty)) + (if-let $true (u64_lt (ty_bits_u64 x_ty) (ty_bits_u64 y_ty))) + (iadd y_ty (uextend y_ty x) y)) +(rule 3 (iadd_uextend x @ (value_type x_ty) y @ (value_type y_ty)) + (if-let $true (u64_lt (ty_bits_u64 y_ty) (ty_bits_u64 x_ty))) + (iadd x_ty x (uextend x_ty y))) + +;; Emits an isub for two values. If they have different types +;; then the smaller type is zero extended to the larger type. +(decl isub_uextend (Value Value) Value) +(rule 1 (isub_uextend x @ (value_type ty) y @ (value_type ty)) + (isub ty x y)) +(rule 2 (isub_uextend x @ (value_type x_ty) y @ (value_type y_ty)) + (if-let $true (u64_lt (ty_bits_u64 x_ty) (ty_bits_u64 y_ty))) + (isub y_ty (uextend y_ty x) y)) +(rule 3 (isub_uextend x @ (value_type x_ty) y @ (value_type y_ty)) + (if-let $true (u64_lt (ty_bits_u64 y_ty) (ty_bits_u64 x_ty))) + (isub x_ty x (uextend x_ty y))) + +;; Try to group constants together so that other cprop rules can optimize them. +;; +;; (rotr (rotr x y) z) == (rotr x (iadd y z)) +;; (rotl (rotl x y) z) == (rotl x (iadd y z)) +;; (rotr (rotl x y) z) == (rotr x (isub y z)) +;; (rotl (rotr x y) z) == (rotl x (isub y z)) +;; +;; if x or z are constants +(rule (simplify (rotl ty (rotl ty x y @ (iconst _ _)) z)) (rotl ty x (iadd_uextend y z))) +(rule (simplify (rotl ty (rotl ty x y) z @ (iconst _ _))) (rotl ty x (iadd_uextend y z))) +(rule (simplify (rotr ty (rotr ty x y @ (iconst _ _)) z)) (rotr ty x (iadd_uextend y z))) +(rule (simplify (rotr ty (rotr ty x y) z @ (iconst _ _))) (rotr ty x (iadd_uextend y z))) + +(rule (simplify (rotr ty (rotl ty x y @ (iconst _ _)) z)) (rotl ty x (isub_uextend y z))) +(rule (simplify (rotr ty (rotl ty x y) z @ (iconst _ _))) (rotl ty x (isub_uextend y z))) +(rule (simplify (rotl ty (rotr ty x y @ (iconst _ _)) z)) (rotr ty x (isub_uextend y z))) +(rule (simplify (rotl ty (rotr ty x y) z @ (iconst _ _))) (rotr ty x (isub_uextend y z))) + +;; Similarly to the rules above, if y and z have the same type, we should emit +;; an iadd or isub instead. In some backends this is cheaper than a rotate. +;; +;; If they have different types we end up in a situation where we have to insert +;; and additional extend and that transformation is not universally beneficial. +;; +;; (rotr (rotr x y) z) == (rotr x (iadd y z)) +;; (rotl (rotl x y) z) == (rotl x (iadd y z)) +;; (rotr (rotl x y) z) == (rotl x (isub y z)) +;; (rotl (rotr x y) z) == (rotr x (isub y z)) +(rule (simplify (rotr ty (rotr ty x y @ (value_type kty)) z @ (value_type kty))) + (rotr ty x (iadd_uextend y z))) +(rule (simplify (rotl ty (rotl ty x y @ (value_type kty)) z @ (value_type kty))) + (rotl ty x (iadd_uextend y z))) + +(rule (simplify (rotr ty (rotl ty x y @ (value_type kty)) z @ (value_type kty))) + (rotl ty x (isub_uextend y z))) +(rule (simplify (rotl ty (rotr ty x y @ (value_type kty)) z @ (value_type kty))) + (rotr ty x (isub_uextend y z))) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index eb03dd24a7b5..c61876b8d481 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -323,6 +323,10 @@ (decl pure partial ty_half_width (Type) Type) (extern constructor ty_half_width ty_half_width) +;; Generate a mask for the maximum shift amount for a given type. i.e 31 for I32. +(decl pure ty_shift_mask (Type) u64) +(rule (ty_shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1)) + ;; Compare two types for equality. (decl pure ty_equal (Type Type) bool) (extern constructor ty_equal ty_equal) diff --git a/cranelift/codegen/src/prelude_opt.isle b/cranelift/codegen/src/prelude_opt.isle index 099930f5b3f1..921c7c65b8ea 100644 --- a/cranelift/codegen/src/prelude_opt.isle +++ b/cranelift/codegen/src/prelude_opt.isle @@ -43,3 +43,16 @@ ;; answer". (decl subsume (Value) Value) (extern constructor subsume subsume) + +;;;;; constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl iconst_u64 (Type u64) Value) + +;; Use a single iconst for types that fit in 64 bits. +(rule 0 (iconst_u64 (ty_int_ref_scalar_64 ty) val) + (if-let $true (u64_le val (ty_umax ty))) + (iconst ty (imm64_masked ty val))) + +;; For i128 types use a iconst, but zero extend it to i128. +(rule 1 (iconst_u64 $I128 val) + (uextend $I128 (iconst $I64 (imm64_masked $I64 val)))) diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index 7852ada12630..dbe984c84d0e 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -524,3 +524,237 @@ block0(v0: i8, v1: i16, v2: i16): ; check: v5 = rotl v0, v1 ; check: return v5 + +function %ishl_prop_overflow(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 7 + v2 = iconst.i8 7 + v3 = ishl.i8 v0, v1 + v4 = ishl.i8 v3, v2 + return v4 +} + +; check: v5 = iconst.i8 0 +; check: return v5 ; v5 = 0 + +function %ishl_prop_type_diff(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i16 7 + v2 = iconst.i16 7 + v3 = ishl.i8 v0, v1 + v4 = ishl.i8 v3, v2 + return v4 +} + +; check: v5 = iconst.i8 0 +; check: return v5 ; v5 = 0 + +function %ushr_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = ushr.i8 v0, v1 + v4 = ushr.i8 v3, v2 + return v4 +} + +; check: v5 = iconst.i8 2 +; check: v6 = ushr v0, v5 ; v5 = 2 +; check: return v6 + +function %ushr_prop_overflow(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 7 + v2 = iconst.i8 7 + v3 = ushr.i8 v0, v1 + v4 = ushr.i8 v3, v2 + return v4 +} + +; check: v5 = iconst.i8 0 +; check: return v5 ; v5 = 0 + + +function %sshr_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = sshr.i8 v0, v1 + v4 = sshr.i8 v3, v2 + return v4 +} + +; check: v5 = iconst.i8 2 +; check: v6 = sshr v0, v5 ; v5 = 2 +; check: return v6 + +function %i128_ushr_overflow_becomes_0(i128) -> i128 { +block0(v0: i128): + v1 = iconst.i16 200 + v2 = ushr v0, v1 + v3 = ushr v2, v1 + return v3 +} + +; check: v4 = iconst.i64 0 +; nextln: v5 = uextend.i128 v4 ; v4 = 0 +; nextln: return v5 + + +function %i128_ishl_overflow_becomes_0(i128) -> i128 { +block0(v0: i128): + v1 = iconst.i16 200 + v2 = ishl v0, v1 + v3 = ishl v2, v1 + return v3 +} + +; check: v4 = iconst.i64 0 +; nextln: v5 = uextend.i128 v4 ; v4 = 0 +; nextln: return v5 + +function %simd_shift_does_not_panic(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i64 0 + v2 = ushr v0, v1 + v3 = ushr v2, v1 + return v3 +} + +; check: v1 = iconst.i64 0 +; nextln: v2 = ushr v0, v1 ; v1 = 0 +; check: return v2 + +function %merges_shift_amount_based_on_lane_type(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i16 30 + v2 = iconst.i16 3 + + v3 = sshr v0, v1 + v4 = sshr v3, v2 + v5 = sshr v4, v2 + return v5 +} + +; check: v1 = iconst.i16 30 +; nextln: v3 = sshr v0, v1 ; v1 = 30 +; nextln: v6 = iconst.i16 6 +; nextln: v7 = sshr v3, v6 ; v6 = 6 +; check: return v7 + + +function %rotr_rotr_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = rotr.i8 v0, v1 + v4 = rotr.i8 v3, v2 + return v4 +} + +; check: v11 = iconst.i8 2 +; check: v13 = rotr v0, v11 ; v11 = 2 +; check: return v13 + +function %rotr_rotr_add(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = rotr.i8 v0, v1 + v4 = rotr.i8 v3, v2 + return v4 +} + +; check: v5 = iadd v1, v2 +; check: v6 = rotr v0, v5 +; check: return v6 + +function %rotl_rotl_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iconst.i8 1 + v3 = rotl.i8 v0, v1 + v4 = rotl.i8 v3, v2 + return v4 +} + +; check: v11 = iconst.i8 2 +; check: v13 = rotl v0, v11 ; v11 = 2 +; check: return v13 + +function %rotl_rotl_add(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = rotl.i8 v0, v1 + v4 = rotl.i8 v3, v2 + return v4 +} + +; check: v5 = iadd v1, v2 +; check: v6 = rotl v0, v5 +; check: return v6 + + +function %rotl_rotr_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = iconst.i8 1 + v3 = rotr.i8 v0, v1 + v4 = rotl.i8 v3, v2 + return v4 +} + +; check: v2 = iconst.i8 1 +; check: v21 = rotr v0, v2 ; v2 = 1 +; check: return v21 + +function %rotl_rotr_add(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = rotr.i8 v0, v1 + v4 = rotl.i8 v3, v2 + return v4 +} + +; check: v5 = isub v1, v2 +; check: v6 = rotr v0, v5 +; check: return v6 + +function %rotl_rotr_prop(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = iconst.i8 1 + v3 = rotl.i8 v0, v1 + v4 = rotr.i8 v3, v2 + return v4 +} + +; check: v2 = iconst.i8 1 +; check: v21 = rotl v0, v2 ; v2 = 1 +; check: return v21 + +function %rotl_rotr_add(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = rotl.i8 v0, v1 + v4 = rotr.i8 v3, v2 + return v4 +} + +; check: v5 = isub v1, v2 +; check: v6 = rotl v0, v5 +; check: return v6 + +function %rotl_rotr_subsume(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotl.i8 v0, v1 + v3 = rotr.i8 v2, v1 + return v3 +} + +; check: return v0 + + +function %rotr_rotl_subsume(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotr.i8 v0, v1 + v3 = rotl.i8 v2, v1 + return v3 +} + +; check: return v0 From c65bdb47c6681b886bdbf0a770c1a3b6375c00d0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 Nov 2023 13:18:48 -0500 Subject: [PATCH 14/17] Add some CLI knobs for the pooling allocator (#7458) I was poking around with these and found it helpful to have them in the CLI config to frob. --- crates/cli-flags/src/lib.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 03900707ac97..d785435b6c02 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -65,6 +65,14 @@ wasmtime_option_group! { /// Enable the pooling allocator, in place of the on-demand allocator. pub pooling_allocator: Option, + /// How many bytes to keep resident between instantiations for the + /// pooling allocator in linear memories. + pub pooling_memory_keep_resident: Option, + + /// How many bytes to keep resident between instantiations for the + /// pooling allocator in tables. + pub pooling_table_keep_resident: Option, + /// Configure attempting to initialize linear memory via a /// copy-on-write mapping (default: yes) pub memory_init_cow: Option, @@ -500,7 +508,14 @@ impl CommonOptions { ["pooling-allocator" : self.opts.pooling_allocator] enable => { if enable { - config.allocation_strategy(wasmtime::InstanceAllocationStrategy::pooling()); + let mut cfg = wasmtime::PoolingAllocationConfig::default(); + if let Some(size) = self.opts.pooling_memory_keep_resident { + cfg.linear_memory_keep_resident(size); + } + if let Some(size) = self.opts.pooling_table_keep_resident { + cfg.table_keep_resident(size); + } + config.allocation_strategy(wasmtime::InstanceAllocationStrategy::Pooling(cfg)); } }, true => err, From 03dd951c88ba611e072783938184c57943359474 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 Nov 2023 13:19:02 -0500 Subject: [PATCH 15/17] Add #[inline] to some trivial methods (#7457) I was seeing these show up in profiles and while inlining them didn't help it helped reduce the noise a bit. --- crates/jit/src/code_memory.rs | 8 ++++++++ crates/runtime/src/mmap.rs | 3 +++ crates/runtime/src/mmap/unix.rs | 3 +++ crates/runtime/src/mmap/windows.rs | 3 +++ 4 files changed, 17 insertions(+) diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 5b4909b48117..517b5d1b9768 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -149,22 +149,26 @@ impl CodeMemory { } /// Returns a reference to the underlying `MmapVec` this memory owns. + #[inline] pub fn mmap(&self) -> &MmapVec { &self.mmap } /// Returns the contents of the text section of the ELF executable this /// represents. + #[inline] pub fn text(&self) -> &[u8] { &self.mmap[self.text.clone()] } /// Returns the contents of the `ELF_WASMTIME_DWARF` section. + #[inline] pub fn dwarf(&self) -> &[u8] { &self.mmap[self.dwarf.clone()] } /// Returns the data in the `ELF_NAME_DATA` section. + #[inline] pub fn func_name_data(&self) -> &[u8] { &self.mmap[self.func_name_data.clone()] } @@ -174,24 +178,28 @@ impl CodeMemory { /// /// This is used for initialization of memories and all data ranges stored /// in a `Module` are relative to the slice returned here. + #[inline] pub fn wasm_data(&self) -> &[u8] { &self.mmap[self.wasm_data.clone()] } /// Returns the encoded address map section used to pass to /// `wasmtime_environ::lookup_file_pos`. + #[inline] pub fn address_map_data(&self) -> &[u8] { &self.mmap[self.address_map_data.clone()] } /// Returns the contents of the `ELF_WASMTIME_INFO` section, or an empty /// slice if it wasn't found. + #[inline] pub fn wasmtime_info(&self) -> &[u8] { &self.mmap[self.info_data.clone()] } /// Returns the contents of the `ELF_WASMTIME_TRAPS` section, or an empty /// slice if it wasn't found. + #[inline] pub fn trap_data(&self) -> &[u8] { &self.mmap[self.trap_data.clone()] } diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs index 4f9f4c64d4e5..341cd4deea36 100644 --- a/crates/runtime/src/mmap.rs +++ b/crates/runtime/src/mmap.rs @@ -146,11 +146,13 @@ impl Mmap { } /// Return the allocated memory as a pointer to u8. + #[inline] pub fn as_ptr(&self) -> *const u8 { self.sys.as_ptr() } /// Return the allocated memory as a mutable pointer to u8. + #[inline] pub fn as_mut_ptr(&mut self) -> *mut u8 { self.sys.as_mut_ptr() } @@ -159,6 +161,7 @@ impl Mmap { /// /// This is the byte length of this entire mapping which includes both /// addressible and non-addressible memory. + #[inline] pub fn len(&self) -> usize { self.sys.len() } diff --git a/crates/runtime/src/mmap/unix.rs b/crates/runtime/src/mmap/unix.rs index 3227ec3a025f..97de914dae10 100644 --- a/crates/runtime/src/mmap/unix.rs +++ b/crates/runtime/src/mmap/unix.rs @@ -84,14 +84,17 @@ impl Mmap { Ok(()) } + #[inline] pub fn as_ptr(&self) -> *const u8 { self.memory.as_ptr() as *const u8 } + #[inline] pub fn as_mut_ptr(&mut self) -> *mut u8 { self.memory.as_ptr().cast() } + #[inline] pub fn len(&self) -> usize { unsafe { (*self.memory.as_ptr()).len() } } diff --git a/crates/runtime/src/mmap/windows.rs b/crates/runtime/src/mmap/windows.rs index 5ca208b337ac..4a8396c44275 100644 --- a/crates/runtime/src/mmap/windows.rs +++ b/crates/runtime/src/mmap/windows.rs @@ -147,14 +147,17 @@ impl Mmap { Ok(()) } + #[inline] pub fn as_ptr(&self) -> *const u8 { self.memory.as_ptr() as *const u8 } + #[inline] pub fn as_mut_ptr(&mut self) -> *mut u8 { self.memory.as_ptr().cast() } + #[inline] pub fn len(&self) -> usize { unsafe { (*self.memory.as_ptr()).len() } } From 64af09d1c6b240a003df0c2fb72e4815e5a9e350 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 Nov 2023 19:47:25 -0500 Subject: [PATCH 16/17] aarch64: Add support for `tbz` and `tbnz` (#7452) * aarch64: Add support for `tbz` and `tbnz` I noticed these instructions when glancing at some disassembly for other code and also noticed that Cranelift didn't have support for them. This adds a few new lowerings for conditional branches in the aarch64 backend which are for testing a bit and branching if it's zero or not zero. * Review comments on naming --- cranelift/codegen/src/isa/aarch64/inst.isle | 26 +++ .../codegen/src/isa/aarch64/inst/args.rs | 37 +++- .../codegen/src/isa/aarch64/inst/emit.rs | 47 +++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 39 +++- cranelift/codegen/src/isa/aarch64/lower.isle | 18 ++ .../codegen/src/isa/aarch64/lower/isle.rs | 11 ++ .../filetests/isa/aarch64/condbr.clif | 180 ++++++++++++++++++ 7 files changed, 342 insertions(+), 16 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 90c0911185c5..314d09b15950 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -833,6 +833,15 @@ (not_taken BranchTarget) (kind CondBrKind)) + ;; A conditional branch which tests the `bit` of `rn` and branches + ;; depending on `kind`. + (TestBitAndBranch + (kind TestBitAndBranchKind) + (taken BranchTarget) + (not_taken BranchTarget) + (rn Reg) + (bit u8)) + ;; A conditional trap: execute a `udf` if the condition is true. This is ;; one VCode instruction because it uses embedded control flow; it is ;; logically a single-in, single-out region, but needs to appear as one @@ -1203,6 +1212,8 @@ (enum Size32 Size64)) +(type TestBitAndBranchKind (enum (Z) (NZ))) + ;; Helper for calculating the `OperandSize` corresponding to a type (decl operand_size (Type) OperandSize) (rule 1 (operand_size (fits_in_32 _ty)) (OperandSize.Size32)) @@ -4079,6 +4090,21 @@ (ConsumesFlags.ConsumesFlagsSideEffect (MInst.CondBr taken not_taken kind))) +;; Helper for emitting `MInst.TestBitAndBranch` instructions. +(decl test_branch (TestBitAndBranchKind BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (test_branch kind taken not_taken rn bit) + (SideEffectNoResult.Inst (MInst.TestBitAndBranch kind taken not_taken rn bit))) + +;; Helper for emitting `tbnz` instructions. +(decl tbnz (BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (tbnz taken not_taken rn bit) + (test_branch (TestBitAndBranchKind.NZ) taken not_taken rn bit)) + +;; Helper for emitting `tbz` instructions. +(decl tbz (BranchTarget BranchTarget Reg u8) SideEffectNoResult) +(rule (tbz taken not_taken rn bit) + (test_branch (TestBitAndBranchKind.Z) taken not_taken rn bit)) + ;; Helper for emitting `MInst.MovToNZCV` instructions. (decl mov_to_nzcv (Reg) ProducesFlags) (rule (mov_to_nzcv rn) diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index c6394e6596f3..f6d7f5800495 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -340,26 +340,31 @@ impl BranchTarget { } } + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset14_or_zero(self) -> u32 { + self.as_offset_bounded(14) + } + /// Return the target's offset, if specified, or zero if label-based. pub fn as_offset19_or_zero(self) -> u32 { - let off = match self { - BranchTarget::ResolvedOffset(off) => off >> 2, - _ => 0, - }; - assert!(off <= 0x3ffff); - assert!(off >= -0x40000); - (off as u32) & 0x7ffff + self.as_offset_bounded(19) } /// Return the target's offset, if specified, or zero if label-based. pub fn as_offset26_or_zero(self) -> u32 { + self.as_offset_bounded(26) + } + + fn as_offset_bounded(self, bits: u32) -> u32 { let off = match self { BranchTarget::ResolvedOffset(off) => off >> 2, _ => 0, }; - assert!(off <= 0x1ffffff); - assert!(off >= -0x2000000); - (off as u32) & 0x3ffffff + let hi = (1 << (bits - 1)) - 1; + let lo = -(1 << bits - 1); + assert!(off <= hi); + assert!(off >= lo); + (off as u32) & ((1 << bits) - 1) } } @@ -764,3 +769,15 @@ impl APIKey { 0xd503201f | (crm << 8) | (op2 << 5) } } + +pub use crate::isa::aarch64::lower::isle::generated_code::TestBitAndBranchKind; + +impl TestBitAndBranchKind { + /// Complements this branch condition to act on the opposite result. + pub fn complement(&self) -> TestBitAndBranchKind { + match self { + TestBitAndBranchKind::Z => TestBitAndBranchKind::NZ, + TestBitAndBranchKind::NZ => TestBitAndBranchKind::Z, + } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 39a49dfdd275..7549ee4a62cd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -172,6 +172,27 @@ fn enc_conditional_br( } } +fn enc_test_bit_and_branch( + kind: TestBitAndBranchKind, + taken: BranchTarget, + reg: Reg, + bit: u8, +) -> u32 { + assert!(bit < 64); + let op_31 = u32::from(bit >> 5); + let op_23_19 = u32::from(bit & 0b11111); + let op_30_24 = 0b0110110 + | match kind { + TestBitAndBranchKind::Z => 0, + TestBitAndBranchKind::NZ => 1, + }; + (op_31 << 31) + | (op_30_24 << 24) + | (op_23_19 << 19) + | (taken.as_offset14_or_zero() << 5) + | machreg_to_gpr(reg) +} + fn enc_move_wide(op: MoveWideOp, rd: Writable, imm: MoveWideConst, size: OperandSize) -> u32 { assert!(imm.shift <= 0b11); let op = match op { @@ -3224,6 +3245,32 @@ impl MachInstEmit for Inst { } sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); } + &Inst::TestBitAndBranch { + taken, + not_taken, + kind, + rn, + bit, + } => { + let rn = allocs.next(rn); + // Emit the conditional branch first + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::Branch14); + let inverted = + enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes(); + sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); + } + sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit)); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); + sink.add_uncond_branch(uncond_off, uncond_off + 4, l); + } + sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); + } &Inst::TrapIf { kind, trap_code } => { let label = sink.defer_trap(trap_code, state.take_stack_map()); // condbr KIND, LABEL diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bf3bd50e6f4f..d6027c074d29 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -902,6 +902,9 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan } CondBrKind::Cond(_) => {} }, + &Inst::TestBitAndBranch { rn, .. } => { + collector.reg_use(rn); + } &Inst::IndirectBr { rn, .. } => { collector.reg_use(rn); } @@ -1043,6 +1046,7 @@ impl MachInst for Inst { &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, &Inst::Jump { .. } => MachTerminator::Uncond, &Inst::CondBr { .. } => MachTerminator::Cond, + &Inst::TestBitAndBranch { .. } => MachTerminator::Cond, &Inst::IndirectBr { .. } => MachTerminator::Indirect, &Inst::JTSequence { .. } => MachTerminator::Indirect, _ => MachTerminator::None, @@ -2663,6 +2667,22 @@ impl Inst { } } } + &Inst::TestBitAndBranch { + kind, + ref taken, + ref not_taken, + rn, + bit, + } => { + let cond = match kind { + TestBitAndBranchKind::Z => "z", + TestBitAndBranchKind::NZ => "nz", + }; + let taken = taken.pretty_print(0, allocs); + let not_taken = not_taken.pretty_print(0, allocs); + let rn = pretty_print_reg(rn, allocs); + format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}") + } &Inst::IndirectBr { rn, .. } => { let rn = pretty_print_reg(rn, allocs); format!("br {}", rn) @@ -2882,6 +2902,9 @@ impl Inst { /// Different forms of label references for different instruction formats. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum LabelUse { + /// 14-bit branch offset (conditional branches). PC-rel, offset is imm << + /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz. + Branch14, /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19 /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond. Branch19, @@ -2908,8 +2931,10 @@ impl MachInstLabelUse for LabelUse { /// Maximum PC-relative range (positive), inclusive. fn max_pos_range(self) -> CodeOffset { match self { - // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20 - // from zero. Likewise for two other shifted cases below. + // N-bit immediate, left-shifted by 2, for (N+2) bits of total + // range. Signed, so +2^(N+1) from zero. Likewise for two other + // shifted cases below. + LabelUse::Branch14 => (1 << 15) - 1, LabelUse::Branch19 => (1 << 20) - 1, LabelUse::Branch26 => (1 << 27) - 1, LabelUse::Ldr19 => (1 << 20) - 1, @@ -2941,6 +2966,7 @@ impl MachInstLabelUse for LabelUse { let pc_rel = pc_rel as u32; let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); let mask = match self { + LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive @@ -2955,6 +2981,7 @@ impl MachInstLabelUse for LabelUse { } }; let pc_rel_inserted = match self { + LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5, LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5, LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff, LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10, @@ -2975,8 +3002,8 @@ impl MachInstLabelUse for LabelUse { /// Is a veneer supported for this label reference type? fn supports_veneer(self) -> bool { match self { - LabelUse::Branch19 => true, // veneer is a Branch26 - LabelUse::Branch26 => true, // veneer is a PCRel32 + LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26 + LabelUse::Branch26 => true, // veneer is a PCRel32 _ => false, } } @@ -2984,7 +3011,7 @@ impl MachInstLabelUse for LabelUse { /// How large is the veneer, if supported? fn veneer_size(self) -> CodeOffset { match self { - LabelUse::Branch19 => 4, + LabelUse::Branch14 | LabelUse::Branch19 => 4, LabelUse::Branch26 => 20, _ => unreachable!(), } @@ -3002,7 +3029,7 @@ impl MachInstLabelUse for LabelUse { veneer_offset: CodeOffset, ) -> (CodeOffset, LabelUse) { match self { - LabelUse::Branch19 => { + LabelUse::Branch14 | LabelUse::Branch19 => { // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't // bother with constructing an Inst. let insn_word = 0b000101 << 26; diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 4d8b29fb6fd2..7af210286880 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -2882,6 +2882,24 @@ (with_flags_side_effect flags (cond_br taken not_taken (cond_br_not_zero rt)))))) +;; Special lowerings for `tbnz` - "Test bit and Branch if Nonzero" +(rule 1 (lower_branch (brif (band x @ (value_type ty) (u64_from_iconst n)) _ _) + (two_targets taken not_taken)) + (if-let bit (test_and_compare_bit_const ty n)) + (emit_side_effect (tbnz taken not_taken x bit))) + +;; Special lowering for `tbz` - "Test bit and Branch if Zero" +(rule 1 (lower_branch (brif (icmp (IntCC.Equal) + (band x @ (value_type (fits_in_64 ty)) + (u64_from_iconst n)) + (u64_from_iconst 0)) _ _) + (two_targets taken not_taken)) + (if-let bit (test_and_compare_bit_const ty n)) + (emit_side_effect (tbz taken not_taken x bit))) + +(decl pure partial test_and_compare_bit_const (Type u64) u8) +(extern constructor test_and_compare_bit_const test_and_compare_bit_const) + ;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower_branch (jump _) (single_target label)) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 0c3f151c4818..1208f519c139 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -812,4 +812,15 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { fn uimm12_scaled_from_i64(&mut self, val: i64, ty: Type) -> Option { UImm12Scaled::maybe_from_i64(val, ty) } + + fn test_and_compare_bit_const(&mut self, ty: Type, n: u64) -> Option { + if n.count_ones() != 1 { + return None; + } + let bit = n.trailing_zeros(); + if bit >= ty.bits() { + return None; + } + Some(bit as u8) + } } diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index f0661556de90..13d59de14513 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -733,3 +733,183 @@ block1: ; block1: ; offset 0x1c ; ret +function %tbnz_i8(i8) { +block0(v0: i8): + v1 = band_imm v0, 0x10 + brif v1, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; tbnz x0, #4, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; tbnz w0, #4, #8 +; block1: ; offset 0x4 +; ret +; block2: ; offset 0x8 +; ret + +function %tbz_i16(i16) { +block0(v0: i16): + v1 = band_imm v0, 0x1000 + v2 = icmp_imm eq v1, 0 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; tbz x0, #12, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; tbz w0, #0xc, #8 +; block1: ; offset 0x4 +; ret +; block2: ; offset 0x8 +; ret + +function %tbnz_i32(i32) { +block0(v0: i32): + v1 = band_imm v0, 0x10000 + brif v1, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; tbnz x0, #16, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; tbnz w0, #0x10, #8 +; block1: ; offset 0x4 +; ret +; block2: ; offset 0x8 +; ret + + +function %tbz_i64(i64) { +block0(v0: i64): + v1 = band_imm v0, 0x1_00000000 + v2 = icmp_imm eq v1, 0 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; tbz x0, #32, label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; tbz x0, #0x20, #8 +; block1: ; offset 0x4 +; ret +; block2: ; offset 0x8 +; ret + +function %not_tbz1(i8) { +block0(v0: i8): + v1 = band_imm v0, 0x100 + v2 = icmp_imm eq v1, 0 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; movz w4, #0 +; and w4, w0, w4 +; uxtb w4, w4 +; subs wzr, w4, #0 +; b.eq label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mov w4, #0 +; and w4, w0, w4 +; uxtb w4, w4 +; cmp w4, #0 +; b.eq #0x18 +; block1: ; offset 0x14 +; ret +; block2: ; offset 0x18 +; ret + +function %not_tbz2(i8) { +block0(v0: i8): + v1 = band_imm v0, 0x3 + v2 = icmp_imm eq v1, 0 + brif v2, block1, block2 + +block1: + return +block2: + return +} + +; VCode: +; block0: +; and w3, w0, #3 +; uxtb w3, w3 +; subs wzr, w3, #0 +; b.eq label2 ; b label1 +; block1: +; ret +; block2: +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; and w3, w0, #3 +; uxtb w3, w3 +; cmp w3, #0 +; b.eq #0x14 +; block1: ; offset 0x10 +; ret +; block2: ; offset 0x14 +; ret From 630f8e32d8921a08ead7d27955686342f9f6fa06 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 2 Nov 2023 19:48:17 -0500 Subject: [PATCH 17/17] Fix some text-format related features (#7464) * Enable `wasmtime/wat` when the `wat` feature is enabled on the CLI * Additionally always enable the `wasmtime/wat` feature for the `wasmtime-wast` crate since the text format is used by some tests. Closes #7460 --- Cargo.toml | 2 +- crates/wast/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b9f5398735bf..51310581149d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -343,7 +343,7 @@ component-model = [ "wasmtime-wast/component-model", "wasmtime-cli-flags/component-model" ] -wat = ["dep:wat"] +wat = ["dep:wat", "wasmtime/wat"] cache = ["dep:wasmtime-cache", "wasmtime-cli-flags/cache"] parallel-compilation = ["wasmtime-cli-flags/parallel-compilation"] logging = ["wasmtime-cli-flags/logging"] diff --git a/crates/wast/Cargo.toml b/crates/wast/Cargo.toml index 24b690427cc6..b264511394fc 100644 --- a/crates/wast/Cargo.toml +++ b/crates/wast/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true [dependencies] anyhow = { workspace = true } -wasmtime = { workspace = true, features = ['cranelift'] } +wasmtime = { workspace = true, features = ['cranelift', 'wat'] } wast = { workspace = true } log = { workspace = true }