Skip to content

Commit

Permalink
instcountci: testing multiple 80bit stores using SVE
Browse files Browse the repository at this point in the history
In preparation for FEX-Emu#4166 which should improve on these results.
  • Loading branch information
pmatos committed Nov 22, 2024
1 parent 22058c0 commit 7ad9f7a
Showing 1 changed file with 206 additions and 0 deletions.
206 changes: 206 additions & 0 deletions unittests/InstructionCountCI/X87store-SVE.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"SVE128",
"SVE256"
],
"DisabledHostFeatures": [
"AFP",
"FLAGM",
"FLAGM2",
"RPRES"
]
},
"Instructions": {
"fstp tword [rax]": {
"ExpectedInstructionCount": 15,
"Comment": "Single 80-bit store.",
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
},
"2-store 80bit": {
"x86InstructionCount": 2,
"ExpectedInstructionCount": 29,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
},
"8-store 80bit": {
"x86InstructionCount": 8,
"ExpectedInstructionCount": 113,
"x86Insts": [
"fstp tword [rax]",
"fstp tword [rax+10]",
"fstp tword [rax+20]",
"fstp tword [rax+30]",
"fstp tword [rax+40]",
"fstp tword [rax+50]",
"fstp tword [rax+60]",
"fstp tword [rax+70]"
],
"ExpectedArm64ASM": [
"ldrb w20, [x28, #1019]",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x4]",
"mov x21, v2.d[1]",
"add x22, x4, #0x8 (8)",
"strh w21, [x22]",
"ldrb w21, [x28, #1298]",
"mov w22, #0x1",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0xa (10)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x14 (20)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x1e (30)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x28 (40)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x32 (50)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x3c (60)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w23, w22, w20",
"bic w21, w21, w23",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]",
"add x21, x4, #0x46 (70)",
"add x0, x28, x20, lsl #4",
"ldr q2, [x0, #1040]",
"str d2, [x21]",
"mov x23, v2.d[1]",
"add x21, x21, #0x8 (8)",
"strh w23, [x21]",
"ldrb w21, [x28, #1298]",
"lsl w22, w22, w20",
"bic w21, w21, w22",
"strb w21, [x28, #1298]",
"add w20, w20, #0x1 (1)",
"and w20, w20, #0x7",
"strb w20, [x28, #1019]"
]
}
}
}

0 comments on commit 7ad9f7a

Please sign in to comment.