-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Fix feature flags dependecies #90612
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: None (Lukacma) ChangesThis patch removes FEAT_FPMR from list of available of architecture features, instead enabling FMPR register by default. Patch is 25.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90612.diff 8 Files Affected:
diff --git a/clang/test/Driver/aarch64-v95a.c b/clang/test/Driver/aarch64-v95a.c
index 1037da65c8cb5c..538ff0876090b2 100644
--- a/clang/test/Driver/aarch64-v95a.c
+++ b/clang/test/Driver/aarch64-v95a.c
@@ -22,6 +22,14 @@
// RUN: %clang -target aarch64 -march=armv9.5-a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-CPA %s
// V95A-CPA: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+cpa"
+// RUN: %clang -target aarch64 -march=armv9.5a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-LUT %s
+// RUN: %clang -target aarch64 -march=armv9.5-a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-LUT %s
+// V95A-LUT: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+lut"
+
+// RUN: %clang -target aarch64 -march=armv9.5a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-FAMINMAX %s
+// RUN: %clang -target aarch64 -march=armv9.5-a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-FAMINMAX %s
+// V95A-FAMINMAX: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+faminmax"
+
// RUN: %clang -target aarch64 -march=armv9.5a+pauth-lr -### -c %s 2>&1 | FileCheck -check-prefix=V95A-PAUTHLR %s
// RUN: %clang -target aarch64 -march=armv9.5-a+pauth-lr -### -c %s 2>&1 | FileCheck -check-prefix=V95A-PAUTHLR %s
// V95A-PAUTHLR: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+pauth-lr"
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 805b963a7a13c7..b0df70f5343a57 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -164,7 +164,6 @@ enum ArchExtKind : unsigned {
AEK_RASv2 = 55, // FEAT_RASv2
AEK_ITE = 56, // FEAT_ITE
AEK_GCS = 57, // FEAT_GCS
- AEK_FPMR = 58, // FEAT_FPMR
AEK_FP8 = 59, // FEAT_FP8
AEK_FAMINMAX = 60, // FEAT_FAMINMAX
AEK_FP8FMA = 61, // FEAT_FP8FMA
@@ -291,18 +290,17 @@ inline constexpr ExtensionInfo Extensions[] = {
{"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_INIT, "", 0},
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
- {"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
- {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0},
+ {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+faminmax, +lut, +bf16", 0},
{"faminmax", AArch64::AEK_FAMINMAX, "+faminmax", "-faminmax", FEAT_INIT, "", 0},
- {"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fpmr", 0},
- {"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2", 0},
- {"fp8dot2", AArch64::AEK_FP8DOT2, "+fp8dot2", "-fp8dot2", FEAT_INIT, "", 0},
- {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0},
- {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "", 0},
- {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0},
+ {"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fp8", 0},
+ {"ssve-fp8fma", AArch64::AEK_SSVE_FP8FMA, "+ssve-fp8fma", "-ssve-fp8fma", FEAT_INIT, "+sme2,+fp8", 0},
+ {"fp8dot2", AArch64::AEK_FP8DOT2, "+fp8dot2", "-fp8dot2", FEAT_INIT, "+fp8dot4", 0},
+ {"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+ssve-fp8dot4", 0},
+ {"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "+fp8fma", 0},
+ {"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+ssve-fp8fma", 0},
{"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0},
{"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0},
- {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme2,+fp8", 0},
+ {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme-f8f32", 0},
{"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0},
{"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_INIT, "", 0},
{"cpa", AArch64::AEK_CPA, "+cpa", "-cpa", FEAT_INIT, "", 0},
@@ -401,8 +399,6 @@ inline constexpr ExtensionDependency ExtensionDependencies[] = {
{AEK_SME, AEK_SMEFA64},
{AEK_SME2, AEK_SME2p1},
{AEK_SME2, AEK_SSVE_FP8FMA},
- {AEK_SME2, AEK_SSVE_FP8DOT2},
- {AEK_SME2, AEK_SSVE_FP8DOT4},
{AEK_SME2, AEK_SMEF8F16},
{AEK_SME2, AEK_SMEF8F32},
{AEK_FP8, AEK_SMEF8F16},
@@ -411,6 +407,16 @@ inline constexpr ExtensionDependency ExtensionDependencies[] = {
{AEK_PREDRES, AEK_SPECRES2},
{AEK_RAS, AEK_RASv2},
{AEK_RCPC, AEK_RCPC3},
+ {AEK_FAMINMAX, AEK_FP8},
+ {AEK_LUT, AEK_FP8},
+ {AEK_BF16, AEK_FP8},
+ {AEK_FP8, AEK_FP8FMA},
+ {AEK_FP8, AEK_SSVE_FP8FMA},
+ {AEK_FP8DOT4, AEK_FP8DOT2},
+ {AEK_SSVE_FP8DOT4, AEK_SSVE_FP8DOT2},
+ {AEK_FP8FMA, AEK_FP8DOT4},
+ {AEK_SSVE_FP8FMA, AEK_SSVE_FP8DOT4},
+ {AEK_SMEF8F32, AEK_SMEF8F16},
};
// clang-format on
@@ -499,7 +505,7 @@ inline constexpr ArchInfo ARMV9_3A = { VersionTuple{9, 3}, AProfile, "armv9.3-a
inline constexpr ArchInfo ARMV9_4A = { VersionTuple{9, 4}, AProfile, "armv9.4-a", "+v9.4a", (ARMV9_3A.DefaultExts |
AArch64::ExtensionBitset({AArch64::AEK_SPECRES2, AArch64::AEK_CSSC, AArch64::AEK_RASv2}))};
inline constexpr ArchInfo ARMV9_5A = { VersionTuple{9, 5}, AProfile, "armv9.5-a", "+v9.5a", (ARMV9_4A.DefaultExts |
- AArch64::ExtensionBitset({AArch64::AEK_CPA}))};
+ AArch64::ExtensionBitset({AArch64::AEK_CPA, AArch64::AEK_LUT, AArch64::AEK_FAMINMAX}))};
// For v8-R, we do not enable crypto and align with GCC that enables a more minimal set of optional architecture extensions.
inline constexpr ArchInfo ARMV8R = { VersionTuple{8, 0}, RProfile, "armv8-r", "+v8r", (ARMV8_5A.DefaultExts |
AArch64::ExtensionBitset({AArch64::AEK_SSBS,
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index efda45a72ef424..3a6cfef5b5ac5b 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -118,12 +118,6 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
-def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true",
- "Enable FPMR Register (FEAT_FPMR)">;
-
-def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
- "Enable FP8 instructions (FEAT_FP8)">;
-
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
@@ -520,34 +514,38 @@ def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
"Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
-def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
- "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
+ "Enable Lookup Table instructions (FEAT_LUT)">;
-def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
- "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+ "Enable FP8 instructions (FEAT_FP8)", [FeatureFAMINMAX, FeatureLUT, FeatureBF16]>;
-def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",
- "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">;
+def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
+ "Enable fp8 multiply-add instructions (FEAT_FP8FMA)", [FeatureFP8]>;
-def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",
- "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>;
+def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
+ "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2, FeatureFP8]>;
def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
- "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">;
+ "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)", [FeatureFP8FMA]>;
+
+def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",
+ "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)", [FeatureFP8DOT4]>;
def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
- "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
-def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
- "Enable Lookup Table instructions (FEAT_LUT)">;
+ "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSSVE_FP8FMA]>;
+
+def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",
+ "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSSVE_FP8DOT4]>;
def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
"Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;
-def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
- "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
-
def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
"Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;
+
+def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
+ "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSMEF8F32]>;
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7abb58064a535..b6b40ff5873b0d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -171,8 +171,6 @@ def HasSME2 : Predicate<"Subtarget->hasSME2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
-def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
- AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
def HasFP8 : Predicate<"Subtarget->hasFP8()">,
AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 0564741c497000..65800ab093422a 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -1942,12 +1942,10 @@ def : RWSysReg<"PM", 0b11, 0b000, 0b0100, 0b0011, 0b001>;
// 2023 ISA Extension
// AArch64 Floating-point Mode Register controls behaviors of the FP8
-// instructions (FEAT_FPMR)
-let Requires = [{ {AArch64::FeatureFPMR} }] in {
+// instructions
// Op0 Op1 CRn CRm Op2
def : ROSysReg<"ID_AA64FPFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b111>;
def : RWSysReg<"FPMR", 0b11, 0b011, 0b0100, 0b0100, 0b010>;
-}
// v9.5a Software Stepping Enhancements (FEAT_STEP2)
// Op0 Op1 CRn CRm Op2
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index a3b966aa61550c..db5e59c8187579 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3718,7 +3718,6 @@ static const struct Extension {
{"sb", {AArch64::FeatureSB}},
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
- {"fpmr", {AArch64::FeatureFPMR}},
{"fp8", {AArch64::FeatureFP8}},
{"faminmax", {AArch64::FeatureFAMINMAX}},
{"fp8fma", {AArch64::FeatureFP8FMA}},
@@ -3731,7 +3730,7 @@ static const struct Extension {
{"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
{"sme-f8f16", {AArch64::FeatureSMEF8F16}},
{"sme-f8f32", {AArch64::FeatureSMEF8F32}},
- {"sme-fa64", {AArch64::FeatureSMEFA64}},
+ {"sme-fa64", {AArch64::FeatureSMEFA64}},
{"cpa", {AArch64::FeatureCPA}},
{"tlbiw", {AArch64::FeatureTLBIW}},
};
diff --git a/llvm/test/MC/AArch64/FP8/system-regs.s b/llvm/test/MC/AArch64/FP8/system-regs.s
index 4a396d4dff82bd..b93950d6eaf2f7 100644
--- a/llvm/test/MC/AArch64/FP8/system-regs.s
+++ b/llvm/test/MC/AArch64/FP8/system-regs.s
@@ -1,11 +1,9 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fpmr < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fpmr < %s \
-// RUN: | llvm-objdump -d --mattr=+fpmr - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fpmr < %s \
-// RUN: | llvm-objdump --mattr=-fpmr -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -filetype=obj < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj < %s \
+// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// --------------------------------------------------------------------------//
// read
@@ -13,14 +11,12 @@
mrs x3, FPMR
// CHECK-INST: mrs x3, FPMR
// CHECK-ENCODING: [0x43,0x44,0x3b,0xd5]
-// CHECK-ERROR: expected readable system register
-// CHECK-UNKNOWN: d53b4443 mrs x3, S3_3_C4_C4_2
+// CHECK-UNKNOWN: d53b4443 mrs x3, FPMR
mrs x3, ID_AA64FPFR0_EL1
// CHECK-INST: mrs x3, ID_AA64FPFR0_EL1
// CHECK-ENCODING: [0xe3,0x04,0x38,0xd5]
-// CHECK-ERROR: expected readable system register
-// CHECK-UNKNOWN: d53804e3 mrs x3, S3_0_C0_C4_7
+// CHECK-UNKNOWN: d53804e3 mrs x3, ID_AA64FPFR0_EL1
// --------------------------------------------------------------------------//
// write
@@ -28,5 +24,4 @@ mrs x3, ID_AA64FPFR0_EL1
msr FPMR, x3
// CHECK-INST: msr FPMR, x3
// CHECK-ENCODING: [0x43,0x44,0x1b,0xd5]
-// CHECK-ERROR: expected writable system register or pstate
-// CHECK-UNKNOWN: d51b4443 msr S3_3_C4_C4_2, x3
+// CHECK-UNKNOWN: d51b4443 msr FPMR, x3
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 2c72a7229b5274..1a5241b0182d70 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1899,44 +1899,43 @@ TEST(TargetParserTest, testAArch64Extension) {
TEST(TargetParserTest, AArch64ExtensionFeatures) {
std::vector<uint64_t> Extensions = {
- AArch64::AEK_CRC, AArch64::AEK_LSE,
- AArch64::AEK_RDM, AArch64::AEK_CRYPTO,
- AArch64::AEK_SM4, AArch64::AEK_SHA3,
- AArch64::AEK_SHA2, AArch64::AEK_AES,
- AArch64::AEK_DOTPROD, AArch64::AEK_FP,
- AArch64::AEK_SIMD, AArch64::AEK_FP16,
- AArch64::AEK_FP16FML, AArch64::AEK_PROFILE,
- AArch64::AEK_RAS, AArch64::AEK_SVE,
- AArch64::AEK_SVE2, AArch64::AEK_SVE2AES,
- AArch64::AEK_SVE2SM4, AArch64::AEK_SVE2SHA3,
- AArch64::AEK_SVE2BITPERM, AArch64::AEK_RCPC,
- AArch64::AEK_RAND, AArch64::AEK_MTE,
- AArch64::AEK_SSBS, AArch64::AEK_SB,
- AArch64::AEK_PREDRES, AArch64::AEK_BF16,
- AArch64::AEK_I8MM, AArch64::AEK_F32MM,
- AArch64::AEK_F64MM, AArch64::AEK_TME,
- AArch64::AEK_LS64, AArch64::AEK_BRBE,
- AArch64::AEK_PAUTH, AArch64::AEK_FLAGM,
- AArch64::AEK_SME, AArch64::AEK_SMEF64F64,
- AArch64::AEK_SMEI16I64, AArch64::AEK_SME2,
- AArch64::AEK_HBC, AArch64::AEK_MOPS,
- AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1,
- AArch64::AEK_SME2p1, AArch64::AEK_B16B16,
- AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC,
- AArch64::AEK_RCPC3, AArch64::AEK_THE,
- AArch64::AEK_D128, AArch64::AEK_LSE128,
- AArch64::AEK_SPECRES2, AArch64::AEK_RASv2,
- AArch64::AEK_ITE, AArch64::AEK_GCS,
- AArch64::AEK_FPMR, AArch64::AEK_FP8,
- AArch64::AEK_FAMINMAX, AArch64::AEK_FP8FMA,
- AArch64::AEK_SSVE_FP8FMA, AArch64::AEK_FP8DOT2,
- AArch64::AEK_SSVE_FP8DOT2, AArch64::AEK_FP8DOT4,
- AArch64::AEK_SSVE_FP8DOT4, AArch64::AEK_LUT,
- AArch64::AEK_SME_LUTv2, AArch64::AEK_SMEF8F16,
- AArch64::AEK_SMEF8F32, AArch64::AEK_SMEFA64,
- AArch64::AEK_CPA, AArch64::AEK_PAUTHLR,
- AArch64::AEK_TLBIW, AArch64::AEK_JSCVT,
- AArch64::AEK_FCMA,
+ AArch64::AEK_CRC, AArch64::AEK_LSE,
+ AArch64::AEK_RDM, AArch64::AEK_CRYPTO,
+ AArch64::AEK_SM4, AArch64::AEK_SHA3,
+ AArch64::AEK_SHA2, AArch64::AEK_AES,
+ AArch64::AEK_DOTPROD, AArch64::AEK_FP,
+ AArch64::AEK_SIMD, AArch64::AEK_FP16,
+ AArch64::AEK_FP16FML, AArch64::AEK_PROFILE,
+ AArch64::AEK_RAS, AArch64::AEK_SVE,
+ AArch64::AEK_SVE2, AArch64::AEK_SVE2AES,
+ AArch64::AEK_SVE2SM4, AArch64::AEK_SVE2SHA3,
+ AArch64::AEK_SVE2BITPERM, AArch64::AEK_RCPC,
+ AArch64::AEK_RAND, AArch64::AEK_MTE,
+ AArch64::AEK_SSBS, AArch64::AEK_SB,
+ AArch64::AEK_PREDRES, AArch64::AEK_BF16,
+ AArch64::AEK_I8MM, AArch64::AEK_F32MM,
+ AArch64::AEK_F64MM, AArch64::AEK_TME,
+ AArch64::AEK_LS64, AArch64::AEK_BRBE,
+ AArch64::AEK_PAUTH, AArch64::AEK_FLAGM,
+ AArch64::AEK_SME, AArch64::AEK_SMEF64F64,
+ AArch64::AEK_SMEI16I64, AArch64::AEK_SME2,
+ AArch64::AEK_HBC, AArch64::AEK_MOPS,
+ AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1,
+ AArch64::AEK_SME2p1, AArch64::AEK_B16B16,
+ AArch64::AEK_SMEF16F16, AArch64::AEK_CSSC,
+ AArch64::AEK_RCPC3, AArch64::AEK_THE,
+ AArch64::AEK_D128, AArch64::AEK_LSE128,
+ AArch64::AEK_SPECRES2, AArch64::AEK_RASv2,
+ AArch64::AEK_ITE, AArch64::AEK_GCS,
+ AArch64::AEK_FP8, AArch64::AEK_FAMINMAX,
+ AArch64::AEK_FP8FMA, AArch64::AEK_SSVE_FP8FMA,
+ AArch64::AEK_FP8DOT2, AArch64::AEK_SSVE_FP8DOT2,
+ AArch64::AEK_FP8DOT4, AArch64::AEK_SSVE_FP8DOT4,
+ AArch64::AEK_LUT, AArch64::AEK_FCMA,
+ AArch64::AEK_SME_LUTv2, AArch64::AEK_SMEF8F16,
+ AArch64::AEK_SMEF8F32, AArch64::AEK_SMEFA64,
+ AArch64::AEK_CPA, AArch64::AEK_PAUTHLR,
+ AArch64::AEK_TLBIW, AArch64::AEK_JSCVT,
};
std::vector<StringRef> Features;
@@ -2009,7 +2008,6 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+specres2"));
EXPECT_TRUE(llvm::is_contained(Features, "+ite"));
EXPECT_TRUE(llvm::is_contained(Features, "+gcs"));
- EXPECT_TRUE(llvm::is_contained(Features, "+fpmr"));
EXPECT_TRUE(llvm::is_contained(Features, "+fp8"));
EXPECT_TRUE(llvm::is_contained(Features, "+faminmax"));
EXPECT_TRUE(llvm::is_contained(Features, "+fp8fma"));
@@ -2155,7 +2153,6 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"predres2", "nopredres2...
[truncated]
|
{"fp8dot2", AArch64::AEK_FP8DOT2, "+fp8dot2", "-fp8dot2", FEAT_INIT, "+fp8dot4", 0}, | ||
{"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+ssve-fp8dot4", 0}, | ||
{"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "+fp8fma", 0}, | ||
{"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+ssve-fp8fma", 0}, | ||
{"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0}, | ||
{"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0}, | ||
{"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+fp8,+sme2", 0}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should also update :
{"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme-f8f32", 0},
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
{"ssve-fp8dot2", AArch64::AEK_SSVE_FP8DOT2, "+ssve-fp8dot2", "-ssve-fp8dot2", FEAT_INIT, "+sme2", 0}, | ||
{"fp8dot4", AArch64::AEK_FP8DOT4, "+fp8dot4", "-fp8dot4", FEAT_INIT, "", 0}, | ||
{"ssve-fp8dot4", AArch64::AEK_SSVE_FP8DOT4, "+ssve-fp8dot4", "-ssve-fp8dot4", FEAT_INIT, "+sme2", 0}, | ||
{"fp8fma", AArch64::AEK_FP8FMA, "+fp8fma", "-fp8fma", FEAT_INIT, "+fp8", 0}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just in case, @tmatheson-arm left a comment in this patch:
#88860
About FEAT_INIT and function multi-version.
If we have FEAT_INIT we should leave the list of dependencies empty, unless we want it to work with FMV
Maybe we should remove the dependencies from this file.
and only have
FEAT_INIT, ""
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might need reworked since I landed #90987. Ping me internally if I can help.
clang/test/Driver/aarch64-v95a.c
Outdated
// RUN: %clang -target aarch64 -march=armv9.5a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-FAMINMAX %s | ||
// RUN: %clang -target aarch64 -march=armv9.5-a+cpa -### -c %s 2>&1 | FileCheck -check-prefix=V95A-FAMINMAX %s | ||
// V95A-FAMINMAX: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.5a"{{.*}} "-target-feature" "+faminmax" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not combine these with the CPA test, rather than increasing the number of RUN lines (which will make the tests slower)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just followed the pattern for other flags here, you can see that pauth-lr and tlbiw have separate run lines as well. But I aggree that maybe we should collapse it into one run line
d07a0c7
to
2f8d7a5
Compare
Can you expand a little bit on the reasoning? It doesn't seem all that problematic but is still eyebrow-raising. |
The overall idea is that system registers ought be available everywhere without the need to explicitly enable them with a command line option. Since The FP8 instructions themselves are still guarded by a target feature. |
Interesting, that's kind of what I'm getting at: longer term, are you saying you folks are considering removing the feature predicates on all system registers (i.e., As for sysregs in general, I think we'd still want the ability to have predicates, in part because some of them can have conflicting encodings, in part to catch obvious mistakes when writing asm; the specific subtarget features would matter. But yeah, on most of them, mainly the architected ones, it's probably fine to do away with feature requirements. |
@momchil-velikov's commentary applies globally and is not specific to FPMR. Which is to say, Arm switched a while back from "all system register need to be protected by their feature flag" to "only protect system registers where there is a need". The rational is that we see it as being unnecessarily burdensome to asm writers to force them to use a feature flag in order to use the pretty printed version of an instruction they can already emit (this is especially true when dynamic feature detection is used, rather than wanting to explicitly say the feature must be present). We've no direct plans to revisit all previously implemented system registers unless there's a specific need. |
This patch removes FEAT_FPMR from list of available of architecture features, instead enabling FMPR register by default.
Additionally dependencies between architectural features are added and fixed.