Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update AArch64 features to Linux 6.10.6 #359

Merged
merged 1 commit into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions include/cpuinfo_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,31 @@ typedef struct {
int smef16f16 : 1; // FP16 to FP16 outer product.
int mops : 1; // Standardized memory operations.
int hbc : 1; // Hinted conditional branches.
int sveb16b16 : 1; // Non-widening BFloat16 to BFloat16 arithmetic for SVE2
// and SME2.
int lrcpc3 : 1; // Load-Acquire RCpc instructions version 3.
int lse128 : 1; // 128-bit Atomics.
int fpmr : 1; // Floating-point Mode Register.
int lut : 1; // Lookup table instructions with 2-bit and 4-bit indices.
int faminmax : 1; // Maximum and minimum absolute value instructions.
int f8cvt : 1; // FP scaling instructions and FP8 convert instructions.
int f8fma : 1; // FP8 to single-precision and half-precision
// multiply-accumulate instructions.
int f8dp4 : 1; // FP8 to single-precision 4-way dot product FDOT (4-way)
// instructions.
int f8dp2 : 1; // FP8 to half-precision 2-way dot product FDOT (2-way)
// instructions.
int f8e4m3 : 1; // Arm FP8 E4M3 format.
int f8e5m2 : 1; // Arm FP8 E5M2 format.
int smelutv2 : 1; // SME2 lookup table LUTI4 and MOVT instructions.
int smef8f16 : 1; // SME2 F8F16 instructions.
int smef8f32 : 1; // SME2 F8F32 instructions.
int smesf8fma : 1; // SVE2 FP8 to single-precision and half-precision
// multiply-accumulate instructions.
int smesf8dp4 : 1; // SVE2 FP8 to single-precision 4-way dot product FDOT
// (4-way) instructions.
int smesf8dp2 : 1; // SVE2 FP8 to half-precision 2-way dot product FDOT
// (2-way) instructions.

// Make sure to update Aarch64FeaturesEnum below if you add a field here.
} Aarch64Features;
Expand Down Expand Up @@ -288,6 +313,24 @@ typedef enum {
AARCH64_SME_F16F16,
AARCH64_MOPS,
AARCH64_HBC,
AARCH64_SVE_B16B16,
AARCH64_LRCPC3,
AARCH64_LSE128,
AARCH64_FPMR,
AARCH64_LUT,
AARCH64_FAMINMAX,
AARCH64_F8CVT,
AARCH64_F8FMA,
AARCH64_F8DP4,
AARCH64_F8DP2,
AARCH64_F8E4M3,
AARCH64_F8E5M2,
AARCH64_SME_LUTV2,
AARCH64_SME_F8F16,
AARCH64_SME_F8F32,
AARCH64_SME_SF8FMA,
AARCH64_SME_SF8DP4,
AARCH64_SME_SF8DP2,
AARCH64_LAST_,
} Aarch64FeaturesEnum;

Expand Down
18 changes: 18 additions & 0 deletions include/internal/hwcaps.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,24 @@ CPU_FEATURES_START_CPP_NAMESPACE
#define AARCH64_HWCAP2_SME_F16F16 (1UL << 42)
#define AARCH64_HWCAP2_MOPS (1UL << 43)
#define AARCH64_HWCAP2_HBC (1UL << 44)
#define AARCH64_HWCAP2_SVE_B16B16 (1UL << 45)
#define AARCH64_HWCAP2_LRCPC3 (1UL << 46)
#define AARCH64_HWCAP2_LSE128 (1UL << 47)
#define AARCH64_HWCAP2_FPMR (1UL << 48)
#define AARCH64_HWCAP2_LUT (1UL << 49)
#define AARCH64_HWCAP2_FAMINMAX (1UL << 50)
#define AARCH64_HWCAP2_F8CVT (1UL << 51)
#define AARCH64_HWCAP2_F8FMA (1UL << 52)
#define AARCH64_HWCAP2_F8DP4 (1UL << 53)
#define AARCH64_HWCAP2_F8DP2 (1UL << 54)
#define AARCH64_HWCAP2_F8E4M3 (1UL << 55)
#define AARCH64_HWCAP2_F8E5M2 (1UL << 56)
#define AARCH64_HWCAP2_SME_LUTV2 (1UL << 57)
#define AARCH64_HWCAP2_SME_F8F16 (1UL << 58)
#define AARCH64_HWCAP2_SME_F8F32 (1UL << 59)
#define AARCH64_HWCAP2_SME_SF8FMA (1UL << 60)
#define AARCH64_HWCAP2_SME_SF8DP4 (1UL << 61)
#define AARCH64_HWCAP2_SME_SF8DP2 (1UL << 62)

// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
#define ARM_HWCAP_SWP (1UL << 0)
Expand Down
24 changes: 23 additions & 1 deletion src/impl_aarch64__base_implementation.inl
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,29 @@
LINE(AARCH64_SME_F16F16, smef16f16, "smef16f16", 0, \
AARCH64_HWCAP2_SME_F16F16) \
LINE(AARCH64_MOPS, mops, "mops", 0, AARCH64_HWCAP2_MOPS) \
LINE(AARCH64_HBC, hbc, "hbc", 0, AARCH64_HWCAP2_HBC)
LINE(AARCH64_HBC, hbc, "hbc", 0, AARCH64_HWCAP2_HBC) \
LINE(AARCH64_SVE_B16B16, sveb16b16, "sveb16b16", 0, \
AARCH64_HWCAP2_SVE_B16B16) \
LINE(AARCH64_LRCPC3, lrcpc3, "lrcpc3", 0, AARCH64_HWCAP2_LRCPC3) \
LINE(AARCH64_LSE128, lse128, "lse128", 0, AARCH64_HWCAP2_LSE128) \
LINE(AARCH64_FPMR, fpmr, "fpmr", 0, AARCH64_HWCAP2_FPMR) \
LINE(AARCH64_LUT, lut, "lut", 0, AARCH64_HWCAP2_LUT) \
LINE(AARCH64_FAMINMAX, faminmax, "faminmax", 0, AARCH64_HWCAP2_FAMINMAX) \
LINE(AARCH64_F8CVT, f8cvt, "f8cvt", 0, AARCH64_HWCAP2_F8CVT) \
LINE(AARCH64_F8FMA, f8fma, "f8fma", 0, AARCH64_HWCAP2_F8FMA) \
LINE(AARCH64_F8DP4, f8dp4, "f8dp4", 0, AARCH64_HWCAP2_F8DP4) \
LINE(AARCH64_F8DP2, f8dp2, "f8dp2", 0, AARCH64_HWCAP2_F8DP2) \
LINE(AARCH64_F8E4M3, f8e4m3, "f8e4m3", 0, AARCH64_HWCAP2_F8E4M3) \
LINE(AARCH64_F8E5M2, f8e5m2, "f8e5m2", 0, AARCH64_HWCAP2_F8E5M2) \
LINE(AARCH64_SME_LUTV2, smelutv2, "smelutv1", 0, AARCH64_HWCAP2_SME_LUTV2) \
LINE(AARCH64_SME_F8F16, smef8f16, "smef8f16", 0, AARCH64_HWCAP2_SME_F8F16) \
LINE(AARCH64_SME_F8F32, smef8f32, "smef8f32", 0, AARCH64_HWCAP2_SME_F8F32) \
LINE(AARCH64_SME_SF8FMA, smesf8fma, "smesf8fma", 0, \
AARCH64_HWCAP2_SME_SF8FMA) \
LINE(AARCH64_SME_SF8DP4, smesf8dp4, "smesf8dp4", 0, \
AARCH64_HWCAP2_SME_SF8DP4) \
LINE(AARCH64_SME_SF8DP2, smesf8dp2, "smesf8dp2", 0, AARCH64_HWCAP2_SME_SF8DP2)

#define INTROSPECTION_PREFIX Aarch64
#define INTROSPECTION_ENUM_PREFIX AARCH64
#include "define_introspection_and_hwcaps.inl"
18 changes: 18 additions & 0 deletions test/cpuinfo_aarch64_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,24 @@ CPU revision : 3)");
EXPECT_FALSE(info.features.smef16f16);
EXPECT_FALSE(info.features.mops);
EXPECT_FALSE(info.features.hbc);
EXPECT_FALSE(info.features.sveb16b16);
EXPECT_FALSE(info.features.lrcpc3);
EXPECT_FALSE(info.features.lse128);
EXPECT_FALSE(info.features.fpmr);
EXPECT_FALSE(info.features.lut);
EXPECT_FALSE(info.features.faminmax);
EXPECT_FALSE(info.features.f8cvt);
EXPECT_FALSE(info.features.f8fma);
EXPECT_FALSE(info.features.f8dp4);
EXPECT_FALSE(info.features.f8dp2);
EXPECT_FALSE(info.features.f8e4m3);
EXPECT_FALSE(info.features.f8e5m2);
EXPECT_FALSE(info.features.smelutv2);
EXPECT_FALSE(info.features.smef8f16);
EXPECT_FALSE(info.features.smef8f32);
EXPECT_FALSE(info.features.smesf8fma);
EXPECT_FALSE(info.features.smesf8dp4);
EXPECT_FALSE(info.features.smesf8dp2);
}
#elif defined(CPU_FEATURES_OS_MACOS)
TEST_F(CpuidAarch64Test, FromDarwinSysctlFromName) {
Expand Down
Loading