-
Notifications
You must be signed in to change notification settings - Fork 123
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
instcountci: testing multiple 80bit ldst using SVE
In preparation for #4166 which should improve on these results.
- Loading branch information
Showing
2 changed files
with
354 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,343 @@ | ||
{ | ||
"Features": { | ||
"Bitness": 64, | ||
"EnabledHostFeatures": [ | ||
"SVE128", | ||
"SVE256" | ||
], | ||
"DisabledHostFeatures": [ | ||
"AFP", | ||
"FLAGM", | ||
"FLAGM2", | ||
"RPRES" | ||
] | ||
}, | ||
"Instructions": { | ||
"fstp tword [rax]": { | ||
"ExpectedInstructionCount": 15, | ||
"Comment": "Single 80-bit store.", | ||
"ExpectedArm64ASM": [ | ||
"ldrb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x4]", | ||
"mov x21, v2.d[1]", | ||
"add x22, x4, #0x8 (8)", | ||
"strh w21, [x22]", | ||
"ldrb w21, [x28, #1298]", | ||
"mov w22, #0x1", | ||
"lsl w22, w22, w20", | ||
"bic w21, w21, w22", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]" | ||
] | ||
}, | ||
"2-store 80bit": { | ||
"x86InstructionCount": 2, | ||
"ExpectedInstructionCount": 29, | ||
"x86Insts": [ | ||
"fstp tword [rax]", | ||
"fstp tword [rax+10]" | ||
], | ||
"ExpectedArm64ASM": [ | ||
"ldrb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x4]", | ||
"mov x21, v2.d[1]", | ||
"add x22, x4, #0x8 (8)", | ||
"strh w21, [x22]", | ||
"ldrb w21, [x28, #1298]", | ||
"mov w22, #0x1", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0xa (10)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w22, w22, w20", | ||
"bic w21, w21, w22", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]" | ||
] | ||
}, | ||
"8-store 80bit": { | ||
"x86InstructionCount": 8, | ||
"ExpectedInstructionCount": 113, | ||
"x86Insts": [ | ||
"fstp tword [rax]", | ||
"fstp tword [rax+10]", | ||
"fstp tword [rax+20]", | ||
"fstp tword [rax+30]", | ||
"fstp tword [rax+40]", | ||
"fstp tword [rax+50]", | ||
"fstp tword [rax+60]", | ||
"fstp tword [rax+70]" | ||
], | ||
"ExpectedArm64ASM": [ | ||
"ldrb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x4]", | ||
"mov x21, v2.d[1]", | ||
"add x22, x4, #0x8 (8)", | ||
"strh w21, [x22]", | ||
"ldrb w21, [x28, #1298]", | ||
"mov w22, #0x1", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0xa (10)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x14 (20)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x1e (30)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x28 (40)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x32 (50)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x3c (60)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w23, w22, w20", | ||
"bic w21, w21, w23", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x21, x4, #0x46 (70)", | ||
"add x0, x28, x20, lsl #4", | ||
"ldr q2, [x0, #1040]", | ||
"str d2, [x21]", | ||
"mov x23, v2.d[1]", | ||
"add x21, x21, #0x8 (8)", | ||
"strh w23, [x21]", | ||
"ldrb w21, [x28, #1298]", | ||
"lsl w22, w22, w20", | ||
"bic w21, w21, w22", | ||
"strb w21, [x28, #1298]", | ||
"add w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]" | ||
] | ||
}, | ||
"fld tword [rax]": { | ||
"ExpectedInstructionCount": 14, | ||
"Comment": "Single 80-bit store.", | ||
"ExpectedArm64ASM": [ | ||
"ldr d2, [x4]", | ||
"add x20, x4, #0x8 (8)", | ||
"ld1 {v2.h}[4], [x20]", | ||
"ldrb w20, [x28, #1019]", | ||
"mov w21, #0x1", | ||
"sub w20, w20, #0x1 (1)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"str q2, [x0, #1040]", | ||
"ldrb w22, [x28, #1298]", | ||
"lsl w20, w21, w20", | ||
"orr w20, w22, w20", | ||
"strb w20, [x28, #1298]" | ||
] | ||
}, | ||
"2-load 80bit": { | ||
"x86InstructionCount": 2, | ||
"ExpectedInstructionCount": 24, | ||
"x86Insts": [ | ||
"fld tword [rax]", | ||
"fld tword [rax+10]" | ||
], | ||
"ExpectedArm64ASM": [ | ||
"ldr d2, [x4]", | ||
"add x20, x4, #0x8 (8)", | ||
"ld1 {v2.h}[4], [x20]", | ||
"add x20, x4, #0xa (10)", | ||
"ldr d3, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v3.h}[4], [x20]", | ||
"ldrb w20, [x28, #1019]", | ||
"sub w20, w20, #0x2 (2)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"str q3, [x0, #1040]", | ||
"add w21, w20, #0x1 (1)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q2, [x0, #1040]", | ||
"mov w21, #0x8", | ||
"sub w20, w21, w20", | ||
"ldrb w21, [x28, #1298]", | ||
"mov w22, #0x303", | ||
"lsr w20, w22, w20", | ||
"orr w20, w21, w20", | ||
"strb w20, [x28, #1298]" | ||
] | ||
}, | ||
"8-load 80bit": { | ||
"x86InstructionCount": 8, | ||
"ExpectedInstructionCount": 67, | ||
"x86Insts": [ | ||
"fld tword [rax]", | ||
"fld tword [rax+10]", | ||
"fld tword [rax+20]", | ||
"fld tword [rax+30]", | ||
"fld tword [rax+40]", | ||
"fld tword [rax+50]", | ||
"fld tword [rax+60]", | ||
"fld tword [rax+70]" | ||
], | ||
"ExpectedArm64ASM": [ | ||
"ldr d2, [x4]", | ||
"add x20, x4, #0x8 (8)", | ||
"ld1 {v2.h}[4], [x20]", | ||
"add x20, x4, #0xa (10)", | ||
"ldr d3, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v3.h}[4], [x20]", | ||
"add x20, x4, #0x14 (20)", | ||
"ldr d4, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v4.h}[4], [x20]", | ||
"add x20, x4, #0x1e (30)", | ||
"ldr d5, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v5.h}[4], [x20]", | ||
"add x20, x4, #0x28 (40)", | ||
"ldr d6, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v6.h}[4], [x20]", | ||
"add x20, x4, #0x32 (50)", | ||
"ldr d7, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v7.h}[4], [x20]", | ||
"add x20, x4, #0x3c (60)", | ||
"ldr d8, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v8.h}[4], [x20]", | ||
"add x20, x4, #0x46 (70)", | ||
"ldr d9, [x20]", | ||
"add x20, x20, #0x8 (8)", | ||
"ld1 {v9.h}[4], [x20]", | ||
"ldrb w20, [x28, #1019]", | ||
"sub w20, w20, #0x8 (8)", | ||
"and w20, w20, #0x7", | ||
"strb w20, [x28, #1019]", | ||
"add x0, x28, x20, lsl #4", | ||
"str q9, [x0, #1040]", | ||
"add w21, w20, #0x1 (1)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q8, [x0, #1040]", | ||
"add w21, w20, #0x2 (2)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q7, [x0, #1040]", | ||
"add w21, w20, #0x3 (3)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q6, [x0, #1040]", | ||
"add w21, w20, #0x4 (4)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q5, [x0, #1040]", | ||
"add w21, w20, #0x5 (5)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q4, [x0, #1040]", | ||
"add w21, w20, #0x6 (6)", | ||
"and w21, w21, #0x7", | ||
"add x0, x28, x21, lsl #4", | ||
"str q3, [x0, #1040]", | ||
"add w20, w20, #0x7 (7)", | ||
"and w20, w20, #0x7", | ||
"add x0, x28, x20, lsl #4", | ||
"str q2, [x0, #1040]", | ||
"mov w20, #0xff", | ||
"strb w20, [x28, #1298]" | ||
] | ||
} | ||
} | ||
} |