Skip to content

Commit

Permalink
Merged main:1746c7838ee0 into amd-gfx:6822a819f13a
Browse files Browse the repository at this point in the history
Local branch amd-gfx 6822a81 Merged main:8ac8c922fb3f into amd-gfx:83d58f188c87
Remote branch main 1746c78 [X86] Add DAG test coverage for Issue llvm#59867 patterns
  • Loading branch information
SC llvm team authored and SC llvm team committed Apr 23, 2023
2 parents 6822a81 + 1746c78 commit b75cb1b
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 58 deletions.
18 changes: 2 additions & 16 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1919,14 +1919,7 @@ int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,

int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
const RelocationRef &Rel) {
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
return getRelocationAddend(ELF32LE, Rel);
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
return getRelocationAddend(ELF64LE, Rel);
if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
return getRelocationAddend(ELF32BE, Rel);
auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
return getRelocationAddend(ELF64BE, Rel);
return getRelocationAddend(cast<ELF64LEObjectFile>(Obj), Rel);
}

template <typename ELFT>
Expand All @@ -1953,14 +1946,7 @@ uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,

uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
const RelocationRef &Rel) {
if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
return getRelocationSymbol(ELF32LE, Rel);
if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
return getRelocationSymbol(ELF64LE, Rel);
if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
return getRelocationSymbol(ELF32BE, Rel);
auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
return getRelocationSymbol(ELF64BE, Rel);
return getRelocationSymbol(cast<ELF64LEObjectFile>(Obj), Rel);
}
} // anonymous namespace

Expand Down
9 changes: 4 additions & 5 deletions clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,11 @@ using MacroList = SmallVector<EnumMacro>;
enum class IncludeGuard { None, FileChanged, IfGuard, DefineGuard };

struct FileState {
FileState()
: ConditionScopes(0), LastLine(0), GuardScanner(IncludeGuard::None) {}
FileState() = default;

int ConditionScopes;
unsigned int LastLine;
IncludeGuard GuardScanner;
int ConditionScopes = 0;
unsigned int LastLine = 0;
IncludeGuard GuardScanner = IncludeGuard::None;
SourceLocation LastMacroLocation;
};

Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Config/llvm-config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

/* Indicate that this is LLVM compiled from the amd-gfx branch. */
#define LLVM_HAVE_BRANCH_AMD_GFX
#define LLVM_MAIN_REVISION 458748
#define LLVM_MAIN_REVISION 458752

/* Define if LLVM_ENABLE_DUMP is enabled */
#cmakedefine LLVM_ENABLE_DUMP
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5048,7 +5048,7 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
EVT VT = Op.getValueType();
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
return SDValue();
if (!VT.isSimple())
return SDValue();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool useSjLjEH() const { return UseSjLjEH; }
bool hasBaseDSP() const {
if (isThumb())
return hasDSP();
return hasThumb2() && hasDSP();
else
return hasV5TEOps();
}
Expand Down
99 changes: 65 additions & 34 deletions llvm/test/CodeGen/ARM/sadd_sat.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1
; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T16
; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP
; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP
; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS
; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP
; RUN: llc < %s -mtriple=armv5te-none-eabi -mattr=+dsp | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP
; RUN: llc < %s -mtriple=armv5te-none-eabi -mattr=+dsp,thumb-mode | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T15TE
; RUN: llc < %s -mtriple=armv6-none-eabi -mattr=+dsp,thumb-mode | FileCheck %s --check-prefixes=CHECK-T1,CHECK-T16
; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP

declare i4 @llvm.sadd.sat.i4(i4, i4)
Expand Down Expand Up @@ -60,33 +62,33 @@ define i32 @func(i32 %x, i32 %y) nounwind {
}

define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T1-LABEL: func2:
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: .save {r4, lr}
; CHECK-T1-NEXT: push {r4, lr}
; CHECK-T1-NEXT: mov r4, r1
; CHECK-T1-NEXT: eors r1, r3
; CHECK-T1-NEXT: adds r2, r0, r2
; CHECK-T1-NEXT: adcs r3, r4
; CHECK-T1-NEXT: eors r4, r3
; CHECK-T1-NEXT: bics r4, r1
; CHECK-T1-NEXT: asrs r1, r3, #31
; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: bmi .LBB1_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: mov r0, r2
; CHECK-T1-NEXT: .LBB1_2:
; CHECK-T1-NEXT: cmp r4, #0
; CHECK-T1-NEXT: bmi .LBB1_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: mov r1, r3
; CHECK-T1-NEXT: pop {r4, pc}
; CHECK-T1-NEXT: .LBB1_4:
; CHECK-T1-NEXT: movs r2, #1
; CHECK-T1-NEXT: lsls r2, r2, #31
; CHECK-T1-NEXT: eors r1, r2
; CHECK-T1-NEXT: pop {r4, pc}
; CHECK-T16-LABEL: func2:
; CHECK-T16: @ %bb.0:
; CHECK-T16-NEXT: .save {r4, lr}
; CHECK-T16-NEXT: push {r4, lr}
; CHECK-T16-NEXT: mov r4, r1
; CHECK-T16-NEXT: eors r1, r3
; CHECK-T16-NEXT: adds r2, r0, r2
; CHECK-T16-NEXT: adcs r3, r4
; CHECK-T16-NEXT: eors r4, r3
; CHECK-T16-NEXT: bics r4, r1
; CHECK-T16-NEXT: asrs r1, r3, #31
; CHECK-T16-NEXT: cmp r4, #0
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: bmi .LBB1_2
; CHECK-T16-NEXT: @ %bb.1:
; CHECK-T16-NEXT: mov r0, r2
; CHECK-T16-NEXT: .LBB1_2:
; CHECK-T16-NEXT: cmp r4, #0
; CHECK-T16-NEXT: bmi .LBB1_4
; CHECK-T16-NEXT: @ %bb.3:
; CHECK-T16-NEXT: mov r1, r3
; CHECK-T16-NEXT: pop {r4, pc}
; CHECK-T16-NEXT: .LBB1_4:
; CHECK-T16-NEXT: movs r2, #1
; CHECK-T16-NEXT: lsls r2, r2, #31
; CHECK-T16-NEXT: eors r1, r2
; CHECK-T16-NEXT: pop {r4, pc}
;
; CHECK-T2-LABEL: func2:
; CHECK-T2: @ %bb.0:
Expand Down Expand Up @@ -115,6 +117,35 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31
; CHECK-ARM-NEXT: mov r1, r2
; CHECK-ARM-NEXT: bx lr
;
; CHECK-T15TE-LABEL: func2:
; CHECK-T15TE: @ %bb.0:
; CHECK-T15TE-NEXT: .save {r4, lr}
; CHECK-T15TE-NEXT: push {r4, lr}
; CHECK-T15TE-NEXT: movs r4, r1
; CHECK-T15TE-NEXT: eors r1, r3
; CHECK-T15TE-NEXT: adds r2, r0, r2
; CHECK-T15TE-NEXT: adcs r3, r4
; CHECK-T15TE-NEXT: eors r4, r3
; CHECK-T15TE-NEXT: bics r4, r1
; CHECK-T15TE-NEXT: asrs r1, r3, #31
; CHECK-T15TE-NEXT: cmp r4, #0
; CHECK-T15TE-NEXT: push {r1}
; CHECK-T15TE-NEXT: pop {r0}
; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r2
; CHECK-T15TE-NEXT: .LBB1_2:
; CHECK-T15TE-NEXT: cmp r4, #0
; CHECK-T15TE-NEXT: bmi .LBB1_4
; CHECK-T15TE-NEXT: @ %bb.3:
; CHECK-T15TE-NEXT: movs r1, r3
; CHECK-T15TE-NEXT: pop {r4, pc}
; CHECK-T15TE-NEXT: .LBB1_4:
; CHECK-T15TE-NEXT: movs r2, #1
; CHECK-T15TE-NEXT: lsls r2, r2, #31
; CHECK-T15TE-NEXT: eors r1, r2
; CHECK-T15TE-NEXT: pop {r4, pc}
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y)
ret i64 %tmp
}
Expand All @@ -127,13 +158,13 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: blt .LBB2_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB2_2:
; CHECK-T1-NEXT: ldr r1, .LCPI2_1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB2_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB2_4:
; CHECK-T1-NEXT: bx lr
; CHECK-T1-NEXT: .p2align 2
Expand Down Expand Up @@ -196,13 +227,13 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-T1-NEXT: cmp r0, #127
; CHECK-T1-NEXT: blt .LBB3_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB3_2:
; CHECK-T1-NEXT: mvns r1, r1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB3_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB3_4:
; CHECK-T1-NEXT: bx lr
;
Expand Down Expand Up @@ -252,13 +283,13 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-T1-NEXT: cmp r0, #7
; CHECK-T1-NEXT: blt .LBB4_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB4_2:
; CHECK-T1-NEXT: mvns r1, r1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB4_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: mov r0, r1
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB4_4:
; CHECK-T1-NEXT: bx lr
;
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/X86/vector-compare-all_of.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1534,3 +1534,53 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
%g = extractelement <32 x i1> %f, i32 0
ret i1 %g
}

; PR59867
define i1 @select_v2i8(ptr %s0, ptr %s1) {
; SSE2-LABEL: select_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movzwl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movzwl (%rsi), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: cmpl $3, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
; SSE42-LABEL: select_v2i8:
; SSE42: # %bb.0:
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; SSE42-NEXT: pxor %xmm0, %xmm1
; SSE42-NEXT: ptest %xmm1, %xmm1
; SSE42-NEXT: sete %al
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: select_v2i8:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1OR2-NEXT: vptest %xmm0, %xmm0
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: select_v2i8:
; AVX512: # %bb.0:
; AVX512-NEXT: movzwl (%rdi), %eax
; AVX512-NEXT: cmpw (%rsi), %ax
; AVX512-NEXT: sete %al
; AVX512-NEXT: retq
%v0 = load <2 x i8>, ptr %s0, align 1
%v1 = load <2 x i8>, ptr %s1, align 1
%cmp = icmp eq <2 x i8> %v0, %v1
%cmp0 = extractelement <2 x i1> %cmp, i32 0
%cmp1 = extractelement <2 x i1> %cmp, i32 1
%res = select i1 %cmp0, i1 %cmp1, i1 false
ret i1 %res
}
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/X86/vector-compare-any_of.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1417,3 +1417,59 @@ define {i32, i1} @test_v16i8_muti_uses(<16 x i8> %x, <16 x i8>%y, <16 x i8> %z)
%r2 = insertvalue {i32, i1} %r1, i1 %c, 1
ret {i32, i1} %r2
}

; PR59867
define i1 @select_v2i8(ptr %s0, ptr %s1) {
; SSE2-LABEL: select_v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movzwl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movzwl (%rsi), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: testl %eax, %eax
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; SSE42-LABEL: select_v2i8:
; SSE42: # %bb.0:
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; SSE42-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; SSE42-NEXT: pcmpeqq %xmm0, %xmm1
; SSE42-NEXT: movmskpd %xmm1, %eax
; SSE42-NEXT: testl %eax, %eax
; SSE42-NEXT: setne %al
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: select_v2i8:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX1OR2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
; AVX1OR2-NEXT: setne %al
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: select_v2i8:
; AVX512: # %bb.0:
; AVX512-NEXT: movzwl (%rdi), %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: movzwl (%rsi), %eax
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: testb $3, %al
; AVX512-NEXT: setne %al
; AVX512-NEXT: retq
%v0 = load <2 x i8>, ptr %s0, align 1
%v1 = load <2 x i8>, ptr %s1, align 1
%cmp = icmp eq <2 x i8> %v0, %v1
%cmp0 = extractelement <2 x i1> %cmp, i32 0
%cmp1 = extractelement <2 x i1> %cmp, i32 1
%res = select i1 %cmp0, i1 true, i1 %cmp1
ret i1 %res
}

0 comments on commit b75cb1b

Please sign in to comment.