From cf6cd1fd67356ca0c2972992928592d2430043d2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 23 Aug 2024 10:32:08 +0100 Subject: [PATCH] [MCA][X86] Add missing 512-bit vpscatterqd/vscatterqps schedule data (REAPPLIED) This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should Reapplied with codegen fix for scatter-schedule.ll Fixes #105675 --- llvm/lib/Target/X86/X86SchedIceLake.td | 2 ++ llvm/lib/Target/X86/X86SchedSkylakeServer.td | 2 ++ llvm/test/CodeGen/X86/scatter-schedule.ll | 4 ++-- .../llvm-mca/X86/IceLakeServer/resources-avx512.s | 10 +++++----- .../llvm-mca/X86/SkylakeServer/resources-avx512.s | 10 +++++----- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index fd372ba4656eba..29b1464e19a32b 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1524,8 +1524,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015 let ReleaseAtCycles = [1,8,8,2]; } def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr, + VPSCATTERQDZmr, VPSCATTERQQZmr, VSCATTERDPDZmr, + VSCATTERQPSZmr, VSCATTERQPDZmr)>; def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 4fded44085e897..2423602d06c470 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015 let ReleaseAtCycles = [1,8,8,2]; } def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, + VPSCATTERQDZmr, VPSCATTERQQZmr, VSCATTERDPDZmr, + VSCATTERQPSZmr, VSCATTERQPDZmr)>; def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { diff --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll index c841e23eab76b2..762a050247a87e 100644 --- a/llvm/test/CodeGen/X86/scatter-schedule.ll +++ b/llvm/test/CodeGen/X86/scatter-schedule.ll @@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: ; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k2} +; CHECK-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1} +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm0 ; CHECK-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index c4df992f3aebca..c509e766540b15 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 36 8 8.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 4.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1 1 0.50 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 19 7 4.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 4.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19 @@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 36 7 8.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 4.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1 1 0.50 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 19 7 4.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 4.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19 @@ -2054,7 +2054,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 408.17 102.67 327.50 327.50 41.50 592.17 5.00 41.50 41.50 41.50 +# CHECK-NEXT: - 612.00 411.17 103.67 327.50 327.50 48.50 593.17 6.00 48.50 48.50 48.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2774,7 +2774,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vpshufd $0, %zmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpshufd $0, (%rax), %zmm19 @@ -2841,7 +2841,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.50 0.50 - - 8.00 1.50 0.50 8.00 8.00 8.00 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: - - - - - - 0.50 - - 0.50 0.50 0.50 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 - - 4.00 0.50 0.50 4.00 4.00 4.00 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index 5eaa0f91fdaaba..9c006d4ebb077d 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -1804,7 +1804,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 36 8 16.00 * vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 8.00 * vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1 1 1.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 19 7 8.00 * vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 8.00 * vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpshufd $0, (%rax), %zmm19 @@ -1871,7 +1871,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 1.00 * vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 36 7 16.00 * vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 8.00 * vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1 1 1.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 19 7 8.00 * vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 19 7 8.00 * vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: 1 3 1.00 vshuff32x4 $0, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vshuff32x4 $0, (%rax), %zmm17, %zmm19 @@ -2052,7 +2052,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 349.67 102.67 355.17 355.17 83.00 650.67 5.00 27.67 +# CHECK-NEXT: - 612.00 352.67 103.67 359.83 359.83 97.00 651.67 6.00 32.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2772,7 +2772,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vpshufd $0, %zmm16, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpshufd $0, (%rax), %zmm19 @@ -2839,7 +2839,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.50 0.50 5.33 5.33 16.00 1.50 0.50 5.33 vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - 1.50 0.50 2.67 2.67 8.00 0.50 0.50 2.67 vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - - - 1.00 - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19