Skip to content

Commit

Permalink
Merged main:85f3d5ca4994ff70a72f6ad81948bf4721e15ef1 into amd-gfx:7a2…
Browse files Browse the repository at this point in the history
…1ca85ec98

Local branch amd-gfx 7a21ca8 Merged main:03948882d3bac33cf71a47df1c7ee0f87aad9fc2 into amd-gfx:d2b54d8b4b44
Remote branch main 85f3d5c [AArch64] Add assembly/disassembly for SVE COMPACT (b/h) and EXPAND (llvm#114053)
  • Loading branch information
SC llvm team authored and SC llvm team committed Oct 30, 2024
2 parents 7a21ca8 + 85f3d5c commit f7d1f64
Show file tree
Hide file tree
Showing 56 changed files with 1,248 additions and 452 deletions.
2 changes: 0 additions & 2 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,8 +765,6 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
.Case("i8mm", HasMatMul)
.Case("bf16", HasBFloat16)
.Case("sve", FPU & SveMode)
.Case("sve-bf16", FPU & SveMode && HasBFloat16)
.Case("sve-i8mm", FPU & SveMode && HasMatMul)
.Case("sve-b16b16", HasSVEB16B16)
.Case("f32mm", FPU & SveMode && HasMatmulFP32)
.Case("f64mm", FPU & SveMode && HasMatmulFP64)
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/aarch64-cpu-supports-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ int check_all_feature() {
return 3;
else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts+dgh"))
return 4;
else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve+sve-bf16"))
else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve"))
return 5;
else if (__builtin_cpu_supports("sve-ebf16+sve-i8mm+f32mm+f64mm"))
else if (__builtin_cpu_supports("sve+ebf16+i8mm+f32mm+f64mm"))
return 6;
else if (__builtin_cpu_supports("sve2+sve2-aes+sve2-pmull128"))
return 7;
Expand Down
11 changes: 0 additions & 11 deletions clang/test/CodeGen/aarch64-fmv-dependencies.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,6 @@ __attribute__((target_version("ssbs"))) int fmv(void) { return 0; }
// CHECK: define dso_local i32 @fmv._Msve() #[[sve:[0-9]+]] {
__attribute__((target_version("sve"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Msve-bf16() #[[sve_bf16_ebf16:[0-9]+]] {
__attribute__((target_version("sve-bf16"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Msve-ebf16() #[[sve_bf16_ebf16:[0-9]+]] {
__attribute__((target_version("sve-ebf16"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Msve-i8mm() #[[sve_i8mm:[0-9]+]] {
__attribute__((target_version("sve-i8mm"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Msve2() #[[sve2:[0-9]+]] {
__attribute__((target_version("sve2"))) int fmv(void) { return 0; }

Expand Down Expand Up @@ -209,8 +200,6 @@ int caller() {
// CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a"
// CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
// CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
// CHECK: attributes #[[sve_bf16_ebf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
// CHECK: attributes #[[sve_i8mm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+i8mm,+neon,+outline-atomics,+sve,+v8a"
// CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"
// CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-aes,+v8a"
// CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-bitperm,+v8a"
Expand Down
20 changes: 10 additions & 10 deletions clang/test/CodeGen/attr-target-version.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ int foo() {
inline int __attribute__((target_version("sha2+aes+f64mm"))) fmv_inline(void) { return 1; }
inline int __attribute__((target_version("fp16+fcma+rdma+sme+ fp16 "))) fmv_inline(void) { return 2; }
inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; }
inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; }
inline int __attribute__((target_version("dit+ebf16"))) fmv_inline(void) { return 8; }
inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; }
inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; }
inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; }
inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; }
inline int __attribute__((target_version("sve+bf16"))) fmv_inline(void) { return 4; }
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
inline int __attribute__((target_version("sve2+sve2-aes+sve2-bitperm"))) fmv_inline(void) { return 9; }
inline int __attribute__((target_version("sve2-sm4+memtag"))) fmv_inline(void) { return 10; }
Expand Down Expand Up @@ -680,7 +680,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMebf16
// CHECK-SAME: () #[[ATTR28:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 8
Expand Down Expand Up @@ -708,7 +708,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mbf16Msve
// CHECK-SAME: () #[[ATTR32:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 4
Expand Down Expand Up @@ -837,20 +837,20 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
// CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3
// CHECK: resolver_else12:
// CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 4295098368
// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 4295098368
// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552
// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552
// CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]]
// CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
// CHECK: resolver_return13:
// CHECK-NEXT: ret ptr @fmv_inline._MditMsve-ebf16
// CHECK-NEXT: ret ptr @fmv_inline._Mbf16Msve
// CHECK: resolver_else14:
// CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 3221225472
// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 3221225472
// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 268566528
// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 268566528
// CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]]
// CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]]
// CHECK: resolver_return15:
// CHECK-NEXT: ret ptr @fmv_inline._MsveMsve-bf16
// CHECK-NEXT: ret ptr @fmv_inline._MditMebf16
// CHECK: resolver_else16:
// CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 20971520
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Sema/attr-target-clones-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ void __attribute__((target_clones("default+sha3"))) warn1(void);

// expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}}
// expected-note@+1 {{conflicting attribute is here}}
void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void);
void __attribute__((target_version("sve"), target_clones("sme+memtag"))) not_compat(void);

int redecl(void);
int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; }
Expand Down Expand Up @@ -78,4 +78,4 @@ int useage(void) {
// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; }
// expected-error@+1 {{'main' cannot be a multiversioned function}}
int __attribute__((target_clones("sve-i8mm"))) main() { return 1; }
int __attribute__((target_clones("i8mm"))) main() { return 1; }
4 changes: 2 additions & 2 deletions clang/test/SemaCXX/attr-target-version.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ double __attribute__((target_version("rcpc"))) diff_type1(void);

auto __attribute__((target_version("rcpc2"))) diff_type2(void) -> int { return 1; }
//expected-error@+1 {{multiversioned function declaration has a different return type}}
auto __attribute__((target_version("sve-bf16"))) diff_type2(void) -> long { return (long)1; }
auto __attribute__((target_version("bf16"))) diff_type2(void) -> long { return (long)1; }

int __attribute__((target_version("fp16fml"))) diff_type3(void) noexcept(false) { return 1; }
//expected-error@+2 {{exception specification in declaration does not match previous declaration}}
Expand All @@ -75,7 +75,7 @@ auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; }
class Cls {
__attribute__((target_version("rng"))) Cls();
// expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support constructors}}
__attribute__((target_version("sve-i8mm"))) ~Cls();
__attribute__((target_version("i8mm"))) ~Cls();
// expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support destructors}}

Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default;
Expand Down
6 changes: 3 additions & 3 deletions compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ enum CPUFeatures {
FEAT_EBF16,
FEAT_RPRES,
FEAT_SVE,
FEAT_SVE_BF16,
FEAT_SVE_EBF16,
FEAT_SVE_I8MM,
RESERVED_FEAT_SVE_BF16, // previously used and now ABI legacy
RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy
RESERVED_FEAT_SVE_I8MM, // previously used and now ABI legacy
FEAT_SVE_F32MM,
FEAT_SVE_F64MM,
FEAT_SVE2,
Expand Down
6 changes: 0 additions & 6 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,10 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_I8MM);
if (hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_EBF16);
if (hwcap2 & HWCAP2_SVE_EBF16)
setCPUFeature(FEAT_SVE_EBF16);
if (hwcap2 & HWCAP2_DGH)
setCPUFeature(FEAT_DGH);
if (hwcap2 & HWCAP2_FRINT)
setCPUFeature(FEAT_FRINTTS);
if (hwcap2 & HWCAP2_SVEI8MM)
setCPUFeature(FEAT_SVE_I8MM);
if (hwcap2 & HWCAP2_SVEF32MM)
setCPUFeature(FEAT_SVE_F32MM);
if (hwcap2 & HWCAP2_SVEF64MM)
Expand Down Expand Up @@ -119,8 +115,6 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_RCPC3);
if (hwcap2 & HWCAP2_BF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE);
if (hwcap2 & HWCAP2_SVE2)
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Semantics/openmp-directive-sets.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ static const OmpDirectiveSet blockConstructSet{
Directive::OMPD_ordered,
Directive::OMPD_parallel,
Directive::OMPD_parallel_masked,
Directive::OMPD_parallel_master,
Directive::OMPD_parallel_workshare,
Directive::OMPD_scope,
Directive::OMPD_single,
Expand Down
21 changes: 16 additions & 5 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ struct EntryBlockArgs {
reduction.isValid() && taskReduction.isValid() &&
useDeviceAddr.isValid() && useDevicePtr.isValid();
}

auto getSyms() const {
return llvm::concat<const semantics::Symbol *const>(
inReduction.syms, map.syms, priv.syms, reduction.syms,
taskReduction.syms, useDeviceAddr.syms, useDevicePtr.syms);
}

auto getVars() const {
return llvm::concat<const mlir::Value>(
inReduction.vars, map.vars, priv.vars, reduction.vars,
taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars);
}
};
} // namespace

Expand Down Expand Up @@ -1506,8 +1518,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
genEntryBlock(converter, args, op->getRegion(0));
bindEntryBlockArgs(
converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
return llvm::to_vector(llvm::concat<const semantics::Symbol *const>(
args.priv.syms, args.reduction.syms));
return llvm::to_vector(args.getSyms());
};

assert((!enableDelayedPrivatization || dsp) &&
Expand Down Expand Up @@ -1581,11 +1592,11 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
mlir::Operation *terminator =
lower::genOpenMPTerminator(builder, sectionsOp, loc);

auto reductionCallback = [&](mlir::Operation *op) {
auto genRegionEntryCB = [&](mlir::Operation *op) {
genEntryBlock(converter, args, op->getRegion(0));
bindEntryBlockArgs(
converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
return reductionSyms;
return llvm::to_vector(args.getSyms());
};

// Generate nested SECTION constructs.
Expand All @@ -1611,7 +1622,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
llvm::omp::Directive::OMPD_section)
.setClauses(&sectionQueue.begin()->clauses)
.setGenRegionEntryCb(reductionCallback),
.setGenRegionEntryCb(genRegionEntryCB),
sectionQueue, sectionQueue.begin());
}

Expand Down
8 changes: 8 additions & 0 deletions flang/lib/Parser/openmp-parsers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,12 +583,19 @@ TYPE_PARSER(sourced(construct<OmpLoopDirective>(first(
"MASKED TASKLOOP SIMD" >>
pure(llvm::omp::Directive::OMPD_masked_taskloop_simd),
"MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_masked_taskloop),
"MASTER TASKLOOP SIMD" >>
pure(llvm::omp::Directive::OMPD_master_taskloop_simd),
"MASTER TASKLOOP" >> pure(llvm::omp::Directive::OMPD_master_taskloop),
"PARALLEL DO SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_do_simd),
"PARALLEL DO" >> pure(llvm::omp::Directive::OMPD_parallel_do),
"PARALLEL MASKED TASKLOOP SIMD" >>
pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd),
"PARALLEL MASKED TASKLOOP" >>
pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop),
"PARALLEL MASTER TASKLOOP SIMD" >>
pure(llvm::omp::Directive::OMPD_parallel_master_taskloop_simd),
"PARALLEL MASTER TASKLOOP" >>
pure(llvm::omp::Directive::OMPD_parallel_master_taskloop),
"SIMD" >> pure(llvm::omp::Directive::OMPD_simd),
"TARGET LOOP" >> pure(llvm::omp::Directive::OMPD_target_loop),
"TARGET PARALLEL DO SIMD" >>
Expand Down Expand Up @@ -706,6 +713,7 @@ TYPE_PARSER(construct<OmpBlockDirective>(first(
"MASTER" >> pure(llvm::omp::Directive::OMPD_master),
"ORDERED" >> pure(llvm::omp::Directive::OMPD_ordered),
"PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked),
"PARALLEL MASTER" >> pure(llvm::omp::Directive::OMPD_parallel_master),
"PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare),
"PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel),
"SCOPE" >> pure(llvm::omp::Directive::OMPD_scope),
Expand Down
15 changes: 15 additions & 0 deletions flang/lib/Parser/unparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,12 @@ class UnparseVisitor {
case llvm::omp::Directive::OMPD_masked_taskloop:
Word("MASKED TASKLOOP");
break;
case llvm::omp::Directive::OMPD_master_taskloop_simd:
Word("MASTER TASKLOOP SIMD");
break;
case llvm::omp::Directive::OMPD_master_taskloop:
Word("MASTER TASKLOOP");
break;
case llvm::omp::Directive::OMPD_parallel_do:
Word("PARALLEL DO ");
break;
Expand All @@ -2286,6 +2292,12 @@ class UnparseVisitor {
case llvm::omp::Directive::OMPD_parallel_masked_taskloop:
Word("PARALLEL MASKED TASKLOOP");
break;
case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd:
Word("PARALLEL MASTER TASKLOOP SIMD");
break;
case llvm::omp::Directive::OMPD_parallel_master_taskloop:
Word("PARALLEL MASTER TASKLOOP");
break;
case llvm::omp::Directive::OMPD_simd:
Word("SIMD ");
break;
Expand Down Expand Up @@ -2390,6 +2402,9 @@ class UnparseVisitor {
case llvm::omp::Directive::OMPD_parallel_masked:
Word("PARALLEL MASKED");
break;
case llvm::omp::Directive::OMPD_parallel_master:
Word("PARALLEL MASTER");
break;
case llvm::omp::Directive::OMPD_parallel_workshare:
Word("PARALLEL WORKSHARE ");
break;
Expand Down
Loading

0 comments on commit f7d1f64

Please sign in to comment.