diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 9a58968e5d20eb..b06c4a64ba5289 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1919,14 +1919,7 @@ int64_t getRelocationAddend(const ELFObjectFile *Obj, int64_t getRelocationAddend(const ELFObjectFileBase *Obj, const RelocationRef &Rel) { - if (auto *ELF32LE = dyn_cast(Obj)) - return getRelocationAddend(ELF32LE, Rel); - if (auto *ELF64LE = dyn_cast(Obj)) - return getRelocationAddend(ELF64LE, Rel); - if (auto *ELF32BE = dyn_cast(Obj)) - return getRelocationAddend(ELF32BE, Rel); - auto *ELF64BE = cast(Obj); - return getRelocationAddend(ELF64BE, Rel); + return getRelocationAddend(cast(Obj), Rel); } template @@ -1953,14 +1946,7 @@ uint32_t getRelocationSymbol(const ELFObjectFile *Obj, uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj, const RelocationRef &Rel) { - if (auto *ELF32LE = dyn_cast(Obj)) - return getRelocationSymbol(ELF32LE, Rel); - if (auto *ELF64LE = dyn_cast(Obj)) - return getRelocationSymbol(ELF64LE, Rel); - if (auto *ELF32BE = dyn_cast(Obj)) - return getRelocationSymbol(ELF32BE, Rel); - auto *ELF64BE = cast(Obj); - return getRelocationSymbol(ELF64BE, Rel); + return getRelocationSymbol(cast(Obj), Rel); } } // anonymous namespace diff --git a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp index 428bfb13064eea..cdd1984ab50cf7 100644 --- a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp @@ -93,12 +93,11 @@ using MacroList = SmallVector; enum class IncludeGuard { None, FileChanged, IfGuard, DefineGuard }; struct FileState { - FileState() - : ConditionScopes(0), LastLine(0), GuardScanner(IncludeGuard::None) {} + FileState() = default; - int ConditionScopes; - unsigned int LastLine; - IncludeGuard GuardScanner; + int ConditionScopes = 0; + unsigned int LastLine = 0; + IncludeGuard GuardScanner = IncludeGuard::None; SourceLocation LastMacroLocation; }; diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index d9ed17b04facc6..153670cab4ab04 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -16,7 +16,7 @@ /* Indicate that this is LLVM compiled from the amd-gfx branch. */ #define LLVM_HAVE_BRANCH_AMD_GFX -#define LLVM_MAIN_REVISION 458748 +#define LLVM_MAIN_REVISION 458752 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 355128786c5b9c..49a4025cae38cf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5048,7 +5048,7 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { EVT VT = Op.getValueType(); - if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only()) return SDValue(); if (!VT.isSimple()) return SDValue(); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 7e4d7a9b841a7f..d9cc38e9e8cb45 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -348,7 +348,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool useSjLjEH() const { return UseSjLjEH; } bool hasBaseDSP() const { if (isThumb()) - return hasDSP(); + return hasThumb2() && hasDSP(); else return hasV5TEOps(); } diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index fc9cd2d5ef5b57..1632c4e86c7629 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 +; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T16 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP ; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS ; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP ; RUN: llc < %s -mtriple=armv5te-none-eabi -mattr=+dsp | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP +; RUN: llc < %s -mtriple=armv5te-none-eabi -mattr=+dsp,thumb-mode | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T15TE +; RUN: llc < %s -mtriple=armv6-none-eabi -mattr=+dsp,thumb-mode | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T16 ; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP declare i4 @llvm.sadd.sat.i4(i4, i4) @@ -60,33 +62,33 @@ define i32 @func(i32 %x, i32 %y) nounwind { } define i64 @func2(i64 %x, i64 %y) nounwind { -; CHECK-T1-LABEL: func2: -; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, lr} -; CHECK-T1-NEXT: push {r4, lr} -; CHECK-T1-NEXT: mov r4, r1 -; CHECK-T1-NEXT: eors r1, r3 -; CHECK-T1-NEXT: adds r2, r0, r2 -; CHECK-T1-NEXT: adcs r3, r4 -; CHECK-T1-NEXT: eors r4, r3 -; CHECK-T1-NEXT: bics r4, r1 -; CHECK-T1-NEXT: asrs r1, r3, #31 -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: bmi .LBB1_2 -; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: .LBB1_2: -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r1, r3 -; CHECK-T1-NEXT: pop {r4, pc} -; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: movs r2, #1 -; CHECK-T1-NEXT: lsls r2, r2, #31 -; CHECK-T1-NEXT: eors r1, r2 -; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T16-LABEL: func2: +; CHECK-T16: @ %bb.0: +; CHECK-T16-NEXT: .save {r4, lr} +; CHECK-T16-NEXT: push {r4, lr} +; CHECK-T16-NEXT: mov r4, r1 +; CHECK-T16-NEXT: eors r1, r3 +; CHECK-T16-NEXT: adds r2, r0, r2 +; CHECK-T16-NEXT: adcs r3, r4 +; CHECK-T16-NEXT: eors r4, r3 +; CHECK-T16-NEXT: bics r4, r1 +; CHECK-T16-NEXT: asrs r1, r3, #31 +; CHECK-T16-NEXT: cmp r4, #0 +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: bmi .LBB1_2 +; CHECK-T16-NEXT: @ %bb.1: +; CHECK-T16-NEXT: mov r0, r2 +; CHECK-T16-NEXT: .LBB1_2: +; CHECK-T16-NEXT: cmp r4, #0 +; CHECK-T16-NEXT: bmi .LBB1_4 +; CHECK-T16-NEXT: @ %bb.3: +; CHECK-T16-NEXT: mov r1, r3 +; CHECK-T16-NEXT: pop {r4, pc} +; CHECK-T16-NEXT: .LBB1_4: +; CHECK-T16-NEXT: movs r2, #1 +; CHECK-T16-NEXT: lsls r2, r2, #31 +; CHECK-T16-NEXT: eors r1, r2 +; CHECK-T16-NEXT: pop {r4, pc} ; ; CHECK-T2-LABEL: func2: ; CHECK-T2: @ %bb.0: @@ -115,6 +117,35 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31 ; CHECK-ARM-NEXT: mov r1, r2 ; CHECK-ARM-NEXT: bx lr +; +; CHECK-T15TE-LABEL: func2: +; CHECK-T15TE: @ %bb.0: +; CHECK-T15TE-NEXT: .save {r4, lr} +; CHECK-T15TE-NEXT: push {r4, lr} +; CHECK-T15TE-NEXT: movs r4, r1 +; CHECK-T15TE-NEXT: eors r1, r3 +; CHECK-T15TE-NEXT: adds r2, r0, r2 +; CHECK-T15TE-NEXT: adcs r3, r4 +; CHECK-T15TE-NEXT: eors r4, r3 +; CHECK-T15TE-NEXT: bics r4, r1 +; CHECK-T15TE-NEXT: asrs r1, r3, #31 +; CHECK-T15TE-NEXT: cmp r4, #0 +; CHECK-T15TE-NEXT: push {r1} +; CHECK-T15TE-NEXT: pop {r0} +; CHECK-T15TE-NEXT: bmi .LBB1_2 +; CHECK-T15TE-NEXT: @ %bb.1: +; CHECK-T15TE-NEXT: movs r0, r2 +; CHECK-T15TE-NEXT: .LBB1_2: +; CHECK-T15TE-NEXT: cmp r4, #0 +; CHECK-T15TE-NEXT: bmi .LBB1_4 +; CHECK-T15TE-NEXT: @ %bb.3: +; CHECK-T15TE-NEXT: movs r1, r3 +; CHECK-T15TE-NEXT: pop {r4, pc} +; CHECK-T15TE-NEXT: .LBB1_4: +; CHECK-T15TE-NEXT: movs r2, #1 +; CHECK-T15TE-NEXT: lsls r2, r2, #31 +; CHECK-T15TE-NEXT: eors r1, r2 +; CHECK-T15TE-NEXT: pop {r4, pc} %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) ret i64 %tmp } @@ -127,13 +158,13 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { ; CHECK-T1-NEXT: cmp r0, r1 ; CHECK-T1-NEXT: blt .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: ldr r1, .LCPI2_1 ; CHECK-T1-NEXT: cmp r0, r1 ; CHECK-T1-NEXT: bgt .LBB2_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB2_4: ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 @@ -196,13 +227,13 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; CHECK-T1-NEXT: cmp r0, #127 ; CHECK-T1-NEXT: blt .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 ; CHECK-T1-NEXT: bgt .LBB3_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; @@ -252,13 +283,13 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; CHECK-T1-NEXT: cmp r0, #7 ; CHECK-T1-NEXT: blt .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB4_2: ; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 ; CHECK-T1-NEXT: bgt .LBB4_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r0, r1 +; CHECK-T1-NEXT: {{movs|mov}} r0, r1 ; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index f152767f84788b..4ed55541b86b1f 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1534,3 +1534,53 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { %g = extractelement <32 x i1> %f, i32 0 ret i1 %g } + +; PR59867 +define i1 @select_v2i8(ptr %s0, ptr %s1) { +; SSE2-LABEL: select_v2i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movzwl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzwl (%rsi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: movmskpd %xmm0, %eax +; SSE2-NEXT: cmpl $3, %eax +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: select_v2i8: +; SSE42: # %bb.0: +; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; SSE42-NEXT: pxor %xmm0, %xmm1 +; SSE42-NEXT: ptest %xmm1, %xmm1 +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq +; +; AVX1OR2-LABEL: select_v2i8: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vptest %xmm0, %xmm0 +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: select_v2i8: +; AVX512: # %bb.0: +; AVX512-NEXT: movzwl (%rdi), %eax +; AVX512-NEXT: cmpw (%rsi), %ax +; AVX512-NEXT: sete %al +; AVX512-NEXT: retq + %v0 = load <2 x i8>, ptr %s0, align 1 + %v1 = load <2 x i8>, ptr %s1, align 1 + %cmp = icmp eq <2 x i8> %v0, %v1 + %cmp0 = extractelement <2 x i1> %cmp, i32 0 + %cmp1 = extractelement <2 x i1> %cmp, i32 1 + %res = select i1 %cmp0, i1 %cmp1, i1 false + ret i1 %res +} diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 9dd4302f9a7402..ec9c59029b01b0 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1417,3 +1417,59 @@ define {i32, i1} @test_v16i8_muti_uses(<16 x i8> %x, <16 x i8>%y, <16 x i8> %z) %r2 = insertvalue {i32, i1} %r1, i1 %c, 1 ret {i32, i1} %r2 } + +; PR59867 +define i1 @select_v2i8(ptr %s0, ptr %s1) { +; SSE2-LABEL: select_v2i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movzwl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzwl (%rsi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: movmskpd %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: select_v2i8: +; SSE42: # %bb.0: +; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; SSE42-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE42-NEXT: movmskpd %xmm1, %eax +; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: setne %al +; SSE42-NEXT: retq +; +; AVX1OR2-LABEL: select_v2i8: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0 +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq +; +; AVX512-LABEL: select_v2i8: +; AVX512: # %bb.0: +; AVX512-NEXT: movzwl (%rdi), %eax +; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: movzwl (%rsi), %eax +; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: testb $3, %al +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq + %v0 = load <2 x i8>, ptr %s0, align 1 + %v1 = load <2 x i8>, ptr %s1, align 1 + %cmp = icmp eq <2 x i8> %v0, %v1 + %cmp0 = extractelement <2 x i1> %cmp, i32 0 + %cmp1 = extractelement <2 x i1> %cmp, i32 1 + %res = select i1 %cmp0, i1 true, i1 %cmp1 + ret i1 %res +}