From 35b89dda2b9734917824b1457f149192669b314c Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 29 Apr 2024 08:40:26 +0800 Subject: [PATCH] [X86][EVEX512] Check hasEVEX512 for canExtendTo512DQ (#90390) Fixes #90356 --- llvm/lib/Target/X86/X86Subtarget.h | 3 +- llvm/test/CodeGen/X86/avx512bwvl-arith.ll | 35 +++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index a458b5f9ec8fbb..4d55a084b730e4 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -244,7 +244,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { // TODO: Currently we're always allowing widening on CPUs without VLX, // because for many cases we don't have a better option. bool canExtendTo512DQ() const { - return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); + return hasAVX512() && hasEVEX512() && + (!hasVLX() || getPreferVectorWidth() >= 512); } bool canExtendTo512BW() const { return hasBWI() && canExtendTo512DQ(); diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll index 4988fc35b10eef..fdc25f44b156a7 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,EVEX256 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s --check-prefixes=CHECK,EVEX512 ; 256-bit @@ -236,3 +236,34 @@ define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) { ret <8 x i16> %x } +define i16 @PR90356(<16 x i1> %a) { +; EVEX256-LABEL: PR90356: +; EVEX256: # %bb.0: +; EVEX256-NEXT: vpsllw $7, %xmm0, %xmm0 +; EVEX256-NEXT: vpmovb2m %xmm0, %k1 +; EVEX256-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; EVEX256-NEXT: movb $63, %al +; EVEX256-NEXT: kmovd %eax, %k1 +; EVEX256-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} +; EVEX256-NEXT: vptestmd %zmm0, %zmm0, %k0 +; EVEX256-NEXT: kmovd %k0, %eax +; EVEX256-NEXT: # kill: def $ax killed $ax killed $eax +; EVEX256-NEXT: vzeroupper +; EVEX256-NEXT: retq +; +; EVEX512-LABEL: PR90356: +; EVEX512: # %bb.0: +; EVEX512-NEXT: vpsllw $7, %xmm0, %xmm0 +; EVEX512-NEXT: vpmovb2m %xmm0, %k0 +; EVEX512-NEXT: vpmovm2w %k0, %ymm0 +; EVEX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; EVEX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; EVEX512-NEXT: vpmovw2m %ymm0, %k0 +; EVEX512-NEXT: kmovd %k0, %eax +; EVEX512-NEXT: # kill: def $ax killed $ax killed $eax +; EVEX512-NEXT: vzeroupper +; EVEX512-NEXT: retq + %1 = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +}