From 9520773c46777adbc1d489f831d6c93b8287ca0e Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 29 Jan 2024 16:46:22 +0000 Subject: [PATCH] [AArch64] Don't generate neon integer complex numbers with +sve2. NFC (#79829) The condition for allowing integer complex number support could also allow neon fixed length complex numbers if +sve2 was specified. This tightens the condition to only allow integer complex number support for scalable vectors. We could generalize this in the future to generate SVE intrinsics for fixed-length vectors, but for the moment this opts for the simpler fix. --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +-- .../AArch64/complex-deinterleaving-f16-add.ll | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2cb7770ee21e4f..5bb8aabea80dbb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26949,7 +26949,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( return false; // If the vector is scalable, SVE is enabled, implying support for complex - // numbers. Otherwirse, we need to ensure complex number support is avaialble + // numbers. Otherwise, we need to ensure complex number support is available if (!VTy->isScalableTy() && !Subtarget->hasComplxNum()) return false; @@ -26965,7 +26965,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported( !llvm::isPowerOf2_32(VTyWidth)) return false; - if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) { + if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) { unsigned ScalarWidth = ScalarTy->getScalarSizeInBits(); return 8 <= ScalarWidth && ScalarWidth <= 64; } diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll index 93497f38063d28..7b8448de2331b4 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s +; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s target triple = "aarch64" @@ -158,6 +159,32 @@ entry: ret <16 x half> %interleaved.vec } + +; Expected not to transform as it is integer +define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: complex_add_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h +; CHECK-NEXT: add v1.8h, v1.8h, v5.8h +; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> + %a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> + %b.real = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> + %b.imag = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> + %0 = sub <8 x i16> %b.real, %a.imag + %1 = add <8 x i16> %b.imag, %a.real + %interleaved.vec = shufflevector <8 x i16> %0, <8 x i16> %1, <16 x i32> + ret <16 x i16> %interleaved.vec +} + + declare { <2 x half>, <2 x half> } @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>) declare <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half>, <2 x half>)