From ce2180803b05a9914127389b3b4cc56e44a1850e Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 26 Oct 2021 12:34:57 +0000 Subject: [PATCH] New code layout --- CMakeLists.txt | 7 +- include/cpu_features_macros.h | 21 +- include/cpuinfo_x86.h | 1 + src/copy.h | 19 + src/cpuinfo_aarch64.c | 151 -- src/define_introspection.inl | 84 + src/define_introspection_and_hwcaps.inl | 26 + src/define_tables.h | 67 - src/equals.h | 23 + src/impl_aarch64_linux_or_android.c | 150 ++ ...info_arm.c => impl_arm_linux_or_android.c} | 96 +- ...fo_mips.c => impl_mips_linux_or_android.c} | 46 +- src/{cpuinfo_ppc.c => impl_ppc_linux.c} | 133 +- ...86.c => impl_x86__base_implementation.inl} | 1401 ++++++++--------- src/impl_x86_freebsd.c | 66 + src/impl_x86_linux_or_android.c | 56 + src/impl_x86_macos.c | 52 + src/impl_x86_windows.c | 48 + src/string_view.c | 18 +- test/CMakeLists.txt | 16 +- 20 files changed, 1327 insertions(+), 1154 deletions(-) create mode 100644 src/copy.h delete mode 100644 src/cpuinfo_aarch64.c create mode 100644 src/define_introspection.inl create mode 100644 src/define_introspection_and_hwcaps.inl delete mode 100644 src/define_tables.h create mode 100644 src/equals.h create mode 100644 src/impl_aarch64_linux_or_android.c rename src/{cpuinfo_arm.c => impl_arm_linux_or_android.c} (70%) rename src/{cpuinfo_mips.c => impl_mips_linux_or_android.c} (72%) rename src/{cpuinfo_ppc.c => impl_ppc_linux.c} (50%) rename src/{cpuinfo_x86.c => impl_x86__base_implementation.inl} (82%) create mode 100644 src/impl_x86_freebsd.c create mode 100644 src/impl_x86_linux_or_android.c create mode 100644 src/impl_x86_macos.c create mode 100644 src/impl_x86_windows.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ed72eb7..f5d5fc7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,22 +65,19 @@ endif() macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpu_features_macros.h) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpu_features_cache_info.h) + file(GLOB IMPL_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/src/impl_*.c") + list(APPEND ${SRCS_LIST_NAME} ${IMPL_SOURCES}) if(PROCESSOR_IS_MIPS) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_mips.h) - list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/src/cpuinfo_mips.c) elseif(PROCESSOR_IS_ARM) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_arm.h) - list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/src/cpuinfo_arm.c) elseif(PROCESSOR_IS_AARCH64) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_aarch64.h) - list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/src/cpuinfo_aarch64.c) elseif(PROCESSOR_IS_X86) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_x86.h) list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/internal/cpuid_x86.h) - list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/src/cpuinfo_x86.c) elseif(PROCESSOR_IS_POWER) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_ppc.h) - list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/src/cpuinfo_ppc.c) else() message(FATAL_ERROR "Unsupported architectures ${CMAKE_SYSTEM_PROCESSOR}") endif() diff --git a/include/cpu_features_macros.h b/include/cpu_features_macros.h index 96acf6d7..8f5c38ca 100644 --- a/include/cpu_features_macros.h +++ b/include/cpu_features_macros.h @@ -67,24 +67,33 @@ // Os //////////////////////////////////////////////////////////////////////////////// -#if defined(__linux__) -#define CPU_FEATURES_OS_LINUX_OR_ANDROID +#if (defined(__freebsd__) || defined(__FreeBSD__)) +#define CPU_FEATURES_OS_FREEBSD #endif #if defined(__ANDROID__) #define CPU_FEATURES_OS_ANDROID #endif +#if defined(__linux__) && !defined(CPU_FEATURES_OS_FREEBSD) && \ + !defined(CPU_FEATURES_OS_ANDROID) +#define CPU_FEATURES_OS_LINUX +#endif + #if (defined(_WIN64) || defined(_WIN32)) #define CPU_FEATURES_OS_WINDOWS #endif #if (defined(__apple__) || defined(__APPLE__) || defined(__MACH__)) -#define CPU_FEATURES_OS_DARWIN +// From https://stackoverflow.com/a/49560690 +#include "TargetConditionals.h" +#if defined(TARGET_OS_OSX) +#define CPU_FEATURES_OS_MACOS +#endif +#if defined(TARGET_OS_IPHONE) +// This is set for any non-Mac Apple products (IOS, TV, WATCH) +#define CPU_FEATURES_OS_IPHONE #endif - -#if (defined(__freebsd__) || defined(__FreeBSD__)) -#define CPU_FEATURES_OS_FREEBSD #endif //////////////////////////////////////////////////////////////////////////////// diff --git a/include/cpuinfo_x86.h b/include/cpuinfo_x86.h index 77a6988d..6285391d 100644 --- a/include/cpuinfo_x86.h +++ b/include/cpuinfo_x86.h @@ -152,6 +152,7 @@ typedef enum { AMD_ZEN_PLUS, // K17 ZEN+ AMD_ZEN2, // K17 ZEN 2 AMD_ZEN3, // K19 ZEN 3 + X86_MICROARCHITECTURE_LAST_, } X86Microarchitecture; // Returns the underlying microarchitecture by looking at X86Info's vendor, diff --git a/src/copy.h b/src/copy.h new file mode 100644 index 00000000..f819b8eb --- /dev/null +++ b/src/copy.h @@ -0,0 +1,19 @@ +#include +#include + +static void copy(char *__restrict dst, const char *src, size_t count) { + size_t offset = 0; + +#define CHUNK_COPY(TYPE) \ + while (count - offset >= sizeof(TYPE)) { \ + *(TYPE *)(dst + offset) = *(const TYPE *)(src + offset); \ + offset += sizeof(TYPE); \ + } + + CHUNK_COPY(uint64_t) + CHUNK_COPY(uint32_t) + CHUNK_COPY(uint16_t) + CHUNK_COPY(uint8_t) + +#undef CHUNK_COPY +} diff --git a/src/cpuinfo_aarch64.c b/src/cpuinfo_aarch64.c deleted file mode 100644 index 0b15759c..00000000 --- a/src/cpuinfo_aarch64.c +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "cpuinfo_aarch64.h" - -#include -#include - -#include "internal/filesystem.h" -#include "internal/hwcaps.h" -#include "internal/stack_line_reader.h" -#include "internal/string_view.h" - -// Generation of feature's getters/setters functions and kGetters, kSetters, -// kCpuInfoFlags and kHardwareCapabilities global tables. -#define DEFINE_TABLE_FEATURES \ - FEATURE(AARCH64_FP, fp, "fp", AARCH64_HWCAP_FP, 0) \ - FEATURE(AARCH64_ASIMD, asimd, "asimd", AARCH64_HWCAP_ASIMD, 0) \ - FEATURE(AARCH64_EVTSTRM, evtstrm, "evtstrm", AARCH64_HWCAP_EVTSTRM, 0) \ - FEATURE(AARCH64_AES, aes, "aes", AARCH64_HWCAP_AES, 0) \ - FEATURE(AARCH64_PMULL, pmull, "pmull", AARCH64_HWCAP_PMULL, 0) \ - FEATURE(AARCH64_SHA1, sha1, "sha1", AARCH64_HWCAP_SHA1, 0) \ - FEATURE(AARCH64_SHA2, sha2, "sha2", AARCH64_HWCAP_SHA2, 0) \ - FEATURE(AARCH64_CRC32, crc32, "crc32", AARCH64_HWCAP_CRC32, 0) \ - FEATURE(AARCH64_ATOMICS, atomics, "atomics", AARCH64_HWCAP_ATOMICS, 0) \ - FEATURE(AARCH64_FPHP, fphp, "fphp", AARCH64_HWCAP_FPHP, 0) \ - FEATURE(AARCH64_ASIMDHP, asimdhp, "asimdhp", AARCH64_HWCAP_ASIMDHP, 0) \ - FEATURE(AARCH64_CPUID, cpuid, "cpuid", AARCH64_HWCAP_CPUID, 0) \ - FEATURE(AARCH64_ASIMDRDM, asimdrdm, "asimdrdm", AARCH64_HWCAP_ASIMDRDM, 0) \ - FEATURE(AARCH64_JSCVT, jscvt, "jscvt", AARCH64_HWCAP_JSCVT, 0) \ - FEATURE(AARCH64_FCMA, fcma, "fcma", AARCH64_HWCAP_FCMA, 0) \ - FEATURE(AARCH64_LRCPC, lrcpc, "lrcpc", AARCH64_HWCAP_LRCPC, 0) \ - FEATURE(AARCH64_DCPOP, dcpop, "dcpop", AARCH64_HWCAP_DCPOP, 0) \ - FEATURE(AARCH64_SHA3, sha3, "sha3", AARCH64_HWCAP_SHA3, 0) \ - FEATURE(AARCH64_SM3, sm3, "sm3", AARCH64_HWCAP_SM3, 0) \ - FEATURE(AARCH64_SM4, sm4, "sm4", AARCH64_HWCAP_SM4, 0) \ - FEATURE(AARCH64_ASIMDDP, asimddp, "asimddp", AARCH64_HWCAP_ASIMDDP, 0) \ - FEATURE(AARCH64_SHA512, sha512, "sha512", AARCH64_HWCAP_SHA512, 0) \ - FEATURE(AARCH64_SVE, sve, "sve", AARCH64_HWCAP_SVE, 0) \ - FEATURE(AARCH64_ASIMDFHM, asimdfhm, "asimdfhm", AARCH64_HWCAP_ASIMDFHM, 0) \ - FEATURE(AARCH64_DIT, dit, "dit", AARCH64_HWCAP_DIT, 0) \ - FEATURE(AARCH64_USCAT, uscat, "uscat", AARCH64_HWCAP_USCAT, 0) \ - FEATURE(AARCH64_ILRCPC, ilrcpc, "ilrcpc", AARCH64_HWCAP_ILRCPC, 0) \ - FEATURE(AARCH64_FLAGM, flagm, "flagm", AARCH64_HWCAP_FLAGM, 0) \ - FEATURE(AARCH64_SSBS, ssbs, "ssbs", AARCH64_HWCAP_SSBS, 0) \ - FEATURE(AARCH64_SB, sb, "sb", AARCH64_HWCAP_SB, 0) \ - FEATURE(AARCH64_PACA, paca, "paca", AARCH64_HWCAP_PACA, 0) \ - FEATURE(AARCH64_PACG, pacg, "pacg", AARCH64_HWCAP_PACG, 0) \ - FEATURE(AARCH64_DCPODP, dcpodp, "dcpodp", 0, AARCH64_HWCAP2_DCPODP) \ - FEATURE(AARCH64_SVE2, sve2, "sve2", 0, AARCH64_HWCAP2_SVE2) \ - FEATURE(AARCH64_SVEAES, sveaes, "sveaes", 0, AARCH64_HWCAP2_SVEAES) \ - FEATURE(AARCH64_SVEPMULL, svepmull, "svepmull", 0, AARCH64_HWCAP2_SVEPMULL) \ - FEATURE(AARCH64_SVEBITPERM, svebitperm, "svebitperm", 0, \ - AARCH64_HWCAP2_SVEBITPERM) \ - FEATURE(AARCH64_SVESHA3, svesha3, "svesha3", 0, AARCH64_HWCAP2_SVESHA3) \ - FEATURE(AARCH64_SVESM4, svesm4, "svesm4", 0, AARCH64_HWCAP2_SVESM4) \ - FEATURE(AARCH64_FLAGM2, flagm2, "flagm2", 0, AARCH64_HWCAP2_FLAGM2) \ - FEATURE(AARCH64_FRINT, frint, "frint", 0, AARCH64_HWCAP2_FRINT) \ - FEATURE(AARCH64_SVEI8MM, svei8mm, "svei8mm", 0, AARCH64_HWCAP2_SVEI8MM) \ - FEATURE(AARCH64_SVEF32MM, svef32mm, "svef32mm", 0, AARCH64_HWCAP2_SVEF32MM) \ - FEATURE(AARCH64_SVEF64MM, svef64mm, "svef64mm", 0, AARCH64_HWCAP2_SVEF64MM) \ - FEATURE(AARCH64_SVEBF16, svebf16, "svebf16", 0, AARCH64_HWCAP2_SVEBF16) \ - FEATURE(AARCH64_I8MM, i8mm, "i8mm", 0, AARCH64_HWCAP2_I8MM) \ - FEATURE(AARCH64_BF16, bf16, "bf16", 0, AARCH64_HWCAP2_BF16) \ - FEATURE(AARCH64_DGH, dgh, "dgh", 0, AARCH64_HWCAP2_DGH) \ - FEATURE(AARCH64_RNG, rng, "rng", 0, AARCH64_HWCAP2_RNG) \ - FEATURE(AARCH64_BTI, bti, "bti", 0, AARCH64_HWCAP2_BTI) \ - FEATURE(AARCH64_MTE, mte, "mte", 0, AARCH64_HWCAP2_MTE) -#define DEFINE_TABLE_FEATURE_TYPE Aarch64Features -#include "define_tables.h" - -static bool HandleAarch64Line(const LineResult result, - Aarch64Info* const info) { - StringView line = result.line; - StringView key, value; - if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { - if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { - for (size_t i = 0; i < AARCH64_LAST_; ++i) { - kSetters[i](&info->features, CpuFeatures_StringView_HasWord( - value, kCpuInfoFlags[i], ' ')); - } - } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { - info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); - } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { - info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); - } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { - info->part = CpuFeatures_StringView_ParsePositiveNumber(value); - } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { - info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); - } - } - return !result.eof; -} - -static void FillProcCpuInfoData(Aarch64Info* const info) { - const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); - if (fd >= 0) { - StackLineReader reader; - StackLineReader_Initialize(&reader, fd); - for (;;) { - if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) { - break; - } - } - CpuFeatures_CloseFile(fd); - } -} - -static const Aarch64Info kEmptyAarch64Info; - -Aarch64Info GetAarch64Info(void) { - // capabilities are fetched from both getauxval and /proc/cpuinfo so we can - // have some information if the executable is sandboxed (aka no access to - // /proc/cpuinfo). - Aarch64Info info = kEmptyAarch64Info; - - FillProcCpuInfoData(&info); - const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); - for (size_t i = 0; i < AARCH64_LAST_; ++i) { - if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { - kSetters[i](&info.features, true); - } - } - - return info; -} - -//////////////////////////////////////////////////////////////////////////////// -// Introspection functions - -int GetAarch64FeaturesEnumValue(const Aarch64Features* features, - Aarch64FeaturesEnum value) { - if (value >= AARCH64_LAST_) return false; - return kGetters[value](features); -} - -const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum value) { - if (value >= AARCH64_LAST_) return "unknown feature"; - return kCpuInfoFlags[value]; -} diff --git a/src/define_introspection.inl b/src/define_introspection.inl new file mode 100644 index 00000000..101a63bc --- /dev/null +++ b/src/define_introspection.inl @@ -0,0 +1,84 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INTROSPECTION_PREFIX +#error "missing INTROSPECTION_PREFIX" +#endif +#ifndef INTROSPECTION_ENUM_PREFIX +#error "missing INTROSPECTION_ENUM_PREFIX" +#endif +#ifndef INTROSPECTION_TABLE +#error "missing INTROSPECTION_TABLE" +#endif + +#include + +#define STRINGIZE_(s) #s +#define STRINGIZE(s) STRINGIZE_(s) + +#define FEAT_TYPE_NAME__(X) X##Features +#define FEAT_TYPE_NAME_(X) FEAT_TYPE_NAME__(X) +#define FEAT_TYPE_NAME FEAT_TYPE_NAME_(INTROSPECTION_PREFIX) + +#define FEAT_ENUM_NAME__(X) X##FeaturesEnum +#define FEAT_ENUM_NAME_(X) FEAT_ENUM_NAME__(X) +#define FEAT_ENUM_NAME FEAT_ENUM_NAME_(INTROSPECTION_PREFIX) + +#define GET_FEAT_ENUM_VALUE__(X) Get##X##FeaturesEnumValue +#define GET_FEAT_ENUM_VALUE_(X) GET_FEAT_ENUM_VALUE__(X) +#define GET_FEAT_ENUM_VALUE GET_FEAT_ENUM_VALUE_(INTROSPECTION_PREFIX) + +#define GET_FEAT_ENUM_NAME__(X) Get##X##FeaturesEnumName +#define GET_FEAT_ENUM_NAME_(X) GET_FEAT_ENUM_NAME__(X) +#define GET_FEAT_ENUM_NAME GET_FEAT_ENUM_NAME_(INTROSPECTION_PREFIX) + +#define FEAT_ENUM_LAST__(X) X##_LAST_ +#define FEAT_ENUM_LAST_(X) FEAT_ENUM_LAST__(X) +#define FEAT_ENUM_LAST FEAT_ENUM_LAST_(INTROSPECTION_ENUM_PREFIX) + +// Generate individual getters and setters. +#define LINE(ENUM, NAME, A, B, C) \ + void set_##ENUM(FEAT_TYPE_NAME* features, bool value) { \ + features->NAME = value; \ + } \ + int get_##ENUM(const FEAT_TYPE_NAME* features) { return features->NAME; } +INTROSPECTION_TABLE +#undef LINE + +// Generate getters table +#define LINE(ENUM, NAME, A, B, C) [ENUM] = get_##ENUM, +static int (*const kGetters[])(const FEAT_TYPE_NAME*) = {INTROSPECTION_TABLE}; +#undef LINE + +// Generate setters table +#define LINE(ENUM, NAME, A, B, C) [ENUM] = set_##ENUM, +static void (*const kSetters[])(FEAT_TYPE_NAME*, bool) = {INTROSPECTION_TABLE}; +#undef LINE + +// Implements the `GetXXXFeaturesEnumValue` API. +int GET_FEAT_ENUM_VALUE(const FEAT_TYPE_NAME* features, FEAT_ENUM_NAME value) { + if (value >= FEAT_ENUM_LAST) return false; + return kGetters[value](features); +} + +// Generate feature name table. +#define LINE(ENUM, NAME, A, B, C) [ENUM] = STRINGIZE(NAME), +static const char* kFeatureNames[] = {INTROSPECTION_TABLE}; +#undef LINE + +// Implements the `GetXXXFeaturesEnumName` API. +const char* GET_FEAT_ENUM_NAME(FEAT_ENUM_NAME value) { + if (value >= FEAT_ENUM_LAST) return "unknown_feature"; + return kFeatureNames[value]; +} diff --git a/src/define_introspection_and_hwcaps.inl b/src/define_introspection_and_hwcaps.inl new file mode 100644 index 00000000..c31b60d9 --- /dev/null +++ b/src/define_introspection_and_hwcaps.inl @@ -0,0 +1,26 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "define_introspection.inl" +#include "internal/hwcaps.h" + +#define LINE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) \ + [ENUM] = (HardwareCapabilities){HWCAP, HWCAP2}, +static const HardwareCapabilities kHardwareCapabilities[] = { + INTROSPECTION_TABLE}; +#undef LINE + +#define LINE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) [ENUM] = CPUINFO_FLAG, +static const char* kCpuInfoFlags[] = {INTROSPECTION_TABLE}; +#undef LINE diff --git a/src/define_tables.h b/src/define_tables.h deleted file mode 100644 index dc1485c4..00000000 --- a/src/define_tables.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// The following preprocessor constants must be defined before including this -// file: -// - DEFINE_TABLE_FEATURE_TYPE, the underlying type (e.g. X86Features) -// - DEFINE_TABLE_FEATURES, the list of FEATURE macros to be inserted. - -// This file is to be included once per `cpuinfo_XXX.c` in order to construct -// feature getters and setters functions as well as several enum indexed tables -// from the db file. -// - `kGetters` a table of getters function pointers from feature enum to -// retrieve a feature, -// - `kSetters` a table of setters function pointers from feature enum to set a -// feature, -// - `kCpuInfoFlags` a table of strings from feature enum to /proc/cpuinfo -// flags, -// - `kHardwareCapabilities` a table of HardwareCapabilities structs indexed by -// their feature enum. - -#ifndef SRC_DEFINE_TABLES_H_ -#define SRC_DEFINE_TABLES_H_ - -#define FEATURE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) [ENUM] = CPUINFO_FLAG, -static const char* kCpuInfoFlags[] = {DEFINE_TABLE_FEATURES}; -#undef FEATURE - -#ifndef DEFINE_TABLE_DONT_GENERATE_HWCAPS -#define FEATURE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) \ - [ENUM] = (HardwareCapabilities){HWCAP, HWCAP2}, -static const HardwareCapabilities kHardwareCapabilities[] = { - DEFINE_TABLE_FEATURES}; -#undef FEATURE -#endif // DEFINE_TABLE_DONT_GENERATE_HWCAPS - -#define FEATURE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) \ - static void set_##ENUM(DEFINE_TABLE_FEATURE_TYPE* features, bool value) { \ - features->NAME = value; \ - } \ - static int get_##ENUM(const DEFINE_TABLE_FEATURE_TYPE* features) { \ - return features->NAME; \ - } -DEFINE_TABLE_FEATURES -#undef FEATURE - -#define FEATURE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) [ENUM] = set_##ENUM, -static void (*const kSetters[])(DEFINE_TABLE_FEATURE_TYPE*, - bool) = {DEFINE_TABLE_FEATURES}; -#undef FEATURE - -#define FEATURE(ENUM, NAME, CPUINFO_FLAG, HWCAP, HWCAP2) [ENUM] = get_##ENUM, -static int (*const kGetters[])(const DEFINE_TABLE_FEATURE_TYPE*) = { - DEFINE_TABLE_FEATURES}; -#undef FEATURE - -#endif // SRC_DEFINE_TABLES_H_ diff --git a/src/equals.h b/src/equals.h new file mode 100644 index 00000000..0df4ccf0 --- /dev/null +++ b/src/equals.h @@ -0,0 +1,23 @@ +#include +#include +#include + +static bool equals(const char *lhs, const char *rhs, size_t count) { + size_t offset = 0; + +#define CHUNK_EQUALS(TYPE) \ + while (count - offset >= sizeof(TYPE)) { \ + TYPE l = *(const TYPE *)(lhs + offset); \ + TYPE r = *(const TYPE *)(rhs + offset); \ + if (l != r) return false; \ + offset += sizeof(TYPE); \ + } + + CHUNK_EQUALS(uint64_t) + CHUNK_EQUALS(uint32_t) + CHUNK_EQUALS(uint16_t) + CHUNK_EQUALS(uint8_t) +#undef CHUNK_EQUALS + + return true; +} diff --git a/src/impl_aarch64_linux_or_android.c b/src/impl_aarch64_linux_or_android.c new file mode 100644 index 00000000..745beb9c --- /dev/null +++ b/src/impl_aarch64_linux_or_android.c @@ -0,0 +1,150 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_AARCH64 +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "cpuinfo_aarch64.h" + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(AARCH64_FP, fp, "fp", AARCH64_HWCAP_FP, 0) \ + LINE(AARCH64_ASIMD, asimd, "asimd", AARCH64_HWCAP_ASIMD, 0) \ + LINE(AARCH64_EVTSTRM, evtstrm, "evtstrm", AARCH64_HWCAP_EVTSTRM, 0) \ + LINE(AARCH64_AES, aes, "aes", AARCH64_HWCAP_AES, 0) \ + LINE(AARCH64_PMULL, pmull, "pmull", AARCH64_HWCAP_PMULL, 0) \ + LINE(AARCH64_SHA1, sha1, "sha1", AARCH64_HWCAP_SHA1, 0) \ + LINE(AARCH64_SHA2, sha2, "sha2", AARCH64_HWCAP_SHA2, 0) \ + LINE(AARCH64_CRC32, crc32, "crc32", AARCH64_HWCAP_CRC32, 0) \ + LINE(AARCH64_ATOMICS, atomics, "atomics", AARCH64_HWCAP_ATOMICS, 0) \ + LINE(AARCH64_FPHP, fphp, "fphp", AARCH64_HWCAP_FPHP, 0) \ + LINE(AARCH64_ASIMDHP, asimdhp, "asimdhp", AARCH64_HWCAP_ASIMDHP, 0) \ + LINE(AARCH64_CPUID, cpuid, "cpuid", AARCH64_HWCAP_CPUID, 0) \ + LINE(AARCH64_ASIMDRDM, asimdrdm, "asimdrdm", AARCH64_HWCAP_ASIMDRDM, 0) \ + LINE(AARCH64_JSCVT, jscvt, "jscvt", AARCH64_HWCAP_JSCVT, 0) \ + LINE(AARCH64_FCMA, fcma, "fcma", AARCH64_HWCAP_FCMA, 0) \ + LINE(AARCH64_LRCPC, lrcpc, "lrcpc", AARCH64_HWCAP_LRCPC, 0) \ + LINE(AARCH64_DCPOP, dcpop, "dcpop", AARCH64_HWCAP_DCPOP, 0) \ + LINE(AARCH64_SHA3, sha3, "sha3", AARCH64_HWCAP_SHA3, 0) \ + LINE(AARCH64_SM3, sm3, "sm3", AARCH64_HWCAP_SM3, 0) \ + LINE(AARCH64_SM4, sm4, "sm4", AARCH64_HWCAP_SM4, 0) \ + LINE(AARCH64_ASIMDDP, asimddp, "asimddp", AARCH64_HWCAP_ASIMDDP, 0) \ + LINE(AARCH64_SHA512, sha512, "sha512", AARCH64_HWCAP_SHA512, 0) \ + LINE(AARCH64_SVE, sve, "sve", AARCH64_HWCAP_SVE, 0) \ + LINE(AARCH64_ASIMDFHM, asimdfhm, "asimdfhm", AARCH64_HWCAP_ASIMDFHM, 0) \ + LINE(AARCH64_DIT, dit, "dit", AARCH64_HWCAP_DIT, 0) \ + LINE(AARCH64_USCAT, uscat, "uscat", AARCH64_HWCAP_USCAT, 0) \ + LINE(AARCH64_ILRCPC, ilrcpc, "ilrcpc", AARCH64_HWCAP_ILRCPC, 0) \ + LINE(AARCH64_FLAGM, flagm, "flagm", AARCH64_HWCAP_FLAGM, 0) \ + LINE(AARCH64_SSBS, ssbs, "ssbs", AARCH64_HWCAP_SSBS, 0) \ + LINE(AARCH64_SB, sb, "sb", AARCH64_HWCAP_SB, 0) \ + LINE(AARCH64_PACA, paca, "paca", AARCH64_HWCAP_PACA, 0) \ + LINE(AARCH64_PACG, pacg, "pacg", AARCH64_HWCAP_PACG, 0) \ + LINE(AARCH64_DCPODP, dcpodp, "dcpodp", 0, AARCH64_HWCAP2_DCPODP) \ + LINE(AARCH64_SVE2, sve2, "sve2", 0, AARCH64_HWCAP2_SVE2) \ + LINE(AARCH64_SVEAES, sveaes, "sveaes", 0, AARCH64_HWCAP2_SVEAES) \ + LINE(AARCH64_SVEPMULL, svepmull, "svepmull", 0, AARCH64_HWCAP2_SVEPMULL) \ + LINE(AARCH64_SVEBITPERM, svebitperm, "svebitperm", 0, \ + AARCH64_HWCAP2_SVEBITPERM) \ + LINE(AARCH64_SVESHA3, svesha3, "svesha3", 0, AARCH64_HWCAP2_SVESHA3) \ + LINE(AARCH64_SVESM4, svesm4, "svesm4", 0, AARCH64_HWCAP2_SVESM4) \ + LINE(AARCH64_FLAGM2, flagm2, "flagm2", 0, AARCH64_HWCAP2_FLAGM2) \ + LINE(AARCH64_FRINT, frint, "frint", 0, AARCH64_HWCAP2_FRINT) \ + LINE(AARCH64_SVEI8MM, svei8mm, "svei8mm", 0, AARCH64_HWCAP2_SVEI8MM) \ + LINE(AARCH64_SVEF32MM, svef32mm, "svef32mm", 0, AARCH64_HWCAP2_SVEF32MM) \ + LINE(AARCH64_SVEF64MM, svef64mm, "svef64mm", 0, AARCH64_HWCAP2_SVEF64MM) \ + LINE(AARCH64_SVEBF16, svebf16, "svebf16", 0, AARCH64_HWCAP2_SVEBF16) \ + LINE(AARCH64_I8MM, i8mm, "i8mm", 0, AARCH64_HWCAP2_I8MM) \ + LINE(AARCH64_BF16, bf16, "bf16", 0, AARCH64_HWCAP2_BF16) \ + LINE(AARCH64_DGH, dgh, "dgh", 0, AARCH64_HWCAP2_DGH) \ + LINE(AARCH64_RNG, rng, "rng", 0, AARCH64_HWCAP2_RNG) \ + LINE(AARCH64_BTI, bti, "bti", 0, AARCH64_HWCAP2_BTI) \ + LINE(AARCH64_MTE, mte, "mte", 0, AARCH64_HWCAP2_MTE) +#define INTROSPECTION_PREFIX Aarch64 +#define INTROSPECTION_ENUM_PREFIX AARCH64 +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static bool HandleAarch64Line(const LineResult result, + Aarch64Info* const info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + for (size_t i = 0; i < AARCH64_LAST_; ++i) { + kSetters[i](&info->features, CpuFeatures_StringView_HasWord( + value, kCpuInfoFlags[i], ' ')); + } + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(Aarch64Info* const info) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const Aarch64Info kEmptyAarch64Info; + +Aarch64Info GetAarch64Info(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + Aarch64Info info = kEmptyAarch64Info; + + FillProcCpuInfoData(&info); + const HardwareCapabilities hwcaps = CpuFeatures_GetHardwareCapabilities(); + for (size_t i = 0; i < AARCH64_LAST_; ++i) { + if (CpuFeatures_IsHwCapsSet(kHardwareCapabilities[i], hwcaps)) { + kSetters[i](&info.features, true); + } + } + + return info; +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_AARCH64 diff --git a/src/cpuinfo_arm.c b/src/impl_arm_linux_or_android.c similarity index 70% rename from src/cpuinfo_arm.c rename to src/impl_arm_linux_or_android.c index 05ee3997..d4e97d3d 100644 --- a/src/cpuinfo_arm.c +++ b/src/impl_arm_linux_or_android.c @@ -12,50 +12,59 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_ARM +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + #include "cpuinfo_arm.h" -#include -#include +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(ARM_SWP, swp, "swp", ARM_HWCAP_SWP, 0) \ + LINE(ARM_HALF, half, "half", ARM_HWCAP_HALF, 0) \ + LINE(ARM_THUMB, thumb, "thumb", ARM_HWCAP_THUMB, 0) \ + LINE(ARM_26BIT, _26bit, "26bit", ARM_HWCAP_26BIT, 0) \ + LINE(ARM_FASTMULT, fastmult, "fastmult", ARM_HWCAP_FAST_MULT, 0) \ + LINE(ARM_FPA, fpa, "fpa", ARM_HWCAP_FPA, 0) \ + LINE(ARM_VFP, vfp, "vfp", ARM_HWCAP_VFP, 0) \ + LINE(ARM_EDSP, edsp, "edsp", ARM_HWCAP_EDSP, 0) \ + LINE(ARM_JAVA, java, "java", ARM_HWCAP_JAVA, 0) \ + LINE(ARM_IWMMXT, iwmmxt, "iwmmxt", ARM_HWCAP_IWMMXT, 0) \ + LINE(ARM_CRUNCH, crunch, "crunch", ARM_HWCAP_CRUNCH, 0) \ + LINE(ARM_THUMBEE, thumbee, "thumbee", ARM_HWCAP_THUMBEE, 0) \ + LINE(ARM_NEON, neon, "neon", ARM_HWCAP_NEON, 0) \ + LINE(ARM_VFPV3, vfpv3, "vfpv3", ARM_HWCAP_VFPV3, 0) \ + LINE(ARM_VFPV3D16, vfpv3d16, "vfpv3d16", ARM_HWCAP_VFPV3D16, 0) \ + LINE(ARM_TLS, tls, "tls", ARM_HWCAP_TLS, 0) \ + LINE(ARM_VFPV4, vfpv4, "vfpv4", ARM_HWCAP_VFPV4, 0) \ + LINE(ARM_IDIVA, idiva, "idiva", ARM_HWCAP_IDIVA, 0) \ + LINE(ARM_IDIVT, idivt, "idivt", ARM_HWCAP_IDIVT, 0) \ + LINE(ARM_VFPD32, vfpd32, "vfpd32", ARM_HWCAP_VFPD32, 0) \ + LINE(ARM_LPAE, lpae, "lpae", ARM_HWCAP_LPAE, 0) \ + LINE(ARM_EVTSTRM, evtstrm, "evtstrm", ARM_HWCAP_EVTSTRM, 0) \ + LINE(ARM_AES, aes, "aes", 0, ARM_HWCAP2_AES) \ + LINE(ARM_PMULL, pmull, "pmull", 0, ARM_HWCAP2_PMULL) \ + LINE(ARM_SHA1, sha1, "sha1", 0, ARM_HWCAP2_SHA1) \ + LINE(ARM_SHA2, sha2, "sha2", 0, ARM_HWCAP2_SHA2) \ + LINE(ARM_CRC32, crc32, "crc32", 0, ARM_HWCAP2_CRC32) +#define INTROSPECTION_PREFIX Arm +#define INTROSPECTION_ENUM_PREFIX ARM +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include #include "internal/bit_utils.h" #include "internal/filesystem.h" -#include "internal/hwcaps.h" #include "internal/stack_line_reader.h" #include "internal/string_view.h" -// Generation of feature's getters/setters functions and kGetters, kSetters, -// kCpuInfoFlags and kHardwareCapabilities global tables. -#define DEFINE_TABLE_FEATURES \ - FEATURE(ARM_SWP, swp, "swp", ARM_HWCAP_SWP, 0) \ - FEATURE(ARM_HALF, half, "half", ARM_HWCAP_HALF, 0) \ - FEATURE(ARM_THUMB, thumb, "thumb", ARM_HWCAP_THUMB, 0) \ - FEATURE(ARM_26BIT, _26bit, "26bit", ARM_HWCAP_26BIT, 0) \ - FEATURE(ARM_FASTMULT, fastmult, "fastmult", ARM_HWCAP_FAST_MULT, 0) \ - FEATURE(ARM_FPA, fpa, "fpa", ARM_HWCAP_FPA, 0) \ - FEATURE(ARM_VFP, vfp, "vfp", ARM_HWCAP_VFP, 0) \ - FEATURE(ARM_EDSP, edsp, "edsp", ARM_HWCAP_EDSP, 0) \ - FEATURE(ARM_JAVA, java, "java", ARM_HWCAP_JAVA, 0) \ - FEATURE(ARM_IWMMXT, iwmmxt, "iwmmxt", ARM_HWCAP_IWMMXT, 0) \ - FEATURE(ARM_CRUNCH, crunch, "crunch", ARM_HWCAP_CRUNCH, 0) \ - FEATURE(ARM_THUMBEE, thumbee, "thumbee", ARM_HWCAP_THUMBEE, 0) \ - FEATURE(ARM_NEON, neon, "neon", ARM_HWCAP_NEON, 0) \ - FEATURE(ARM_VFPV3, vfpv3, "vfpv3", ARM_HWCAP_VFPV3, 0) \ - FEATURE(ARM_VFPV3D16, vfpv3d16, "vfpv3d16", ARM_HWCAP_VFPV3D16, 0) \ - FEATURE(ARM_TLS, tls, "tls", ARM_HWCAP_TLS, 0) \ - FEATURE(ARM_VFPV4, vfpv4, "vfpv4", ARM_HWCAP_VFPV4, 0) \ - FEATURE(ARM_IDIVA, idiva, "idiva", ARM_HWCAP_IDIVA, 0) \ - FEATURE(ARM_IDIVT, idivt, "idivt", ARM_HWCAP_IDIVT, 0) \ - FEATURE(ARM_VFPD32, vfpd32, "vfpd32", ARM_HWCAP_VFPD32, 0) \ - FEATURE(ARM_LPAE, lpae, "lpae", ARM_HWCAP_LPAE, 0) \ - FEATURE(ARM_EVTSTRM, evtstrm, "evtstrm", ARM_HWCAP_EVTSTRM, 0) \ - FEATURE(ARM_AES, aes, "aes", 0, ARM_HWCAP2_AES) \ - FEATURE(ARM_PMULL, pmull, "pmull", 0, ARM_HWCAP2_PMULL) \ - FEATURE(ARM_SHA1, sha1, "sha1", 0, ARM_HWCAP2_SHA1) \ - FEATURE(ARM_SHA2, sha2, "sha2", 0, ARM_HWCAP2_SHA2) \ - FEATURE(ARM_CRC32, crc32, "crc32", 0, ARM_HWCAP2_CRC32) -#define DEFINE_TABLE_FEATURE_TYPE ArmFeatures -#include "define_tables.h" - typedef struct { bool processor_reports_armv6; bool hardware_reports_goldfish; @@ -197,16 +206,5 @@ ArmInfo GetArmInfo(void) { return info; } -//////////////////////////////////////////////////////////////////////////////// -// Introspection functions - -int GetArmFeaturesEnumValue(const ArmFeatures* features, - ArmFeaturesEnum value) { - if (value >= ARM_LAST_) return false; - return kGetters[value](features); -} - -const char* GetArmFeaturesEnumName(ArmFeaturesEnum value) { - if (value >= ARM_LAST_) return "unknown feature"; - return kCpuInfoFlags[value]; -} +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_ARM diff --git a/src/cpuinfo_mips.c b/src/impl_mips_linux_or_android.c similarity index 72% rename from src/cpuinfo_mips.c rename to src/impl_mips_linux_or_android.c index 887ab9f7..9a3dc2fe 100644 --- a/src/cpuinfo_mips.c +++ b/src/impl_mips_linux_or_android.c @@ -1,5 +1,3 @@ -// Copyright 2017 Google LLC -// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -12,24 +10,33 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_MIPS +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + #include "cpuinfo_mips.h" -#include +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(MIPS_MSA, msa, "msa", MIPS_HWCAP_MSA, 0) \ + LINE(MIPS_EVA, eva, "eva", 0, 0) \ + LINE(MIPS_R6, r6, "r6", MIPS_HWCAP_R6, 0) +#define INTROSPECTION_PREFIX Mips +#define INTROSPECTION_ENUM_PREFIX MIPS +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// #include "internal/filesystem.h" #include "internal/hwcaps.h" #include "internal/stack_line_reader.h" #include "internal/string_view.h" -// Generation of feature's getters/setters functions and kGetters, kSetters, -// kCpuInfoFlags and kHardwareCapabilities global tables. -#define DEFINE_TABLE_FEATURES \ - FEATURE(MIPS_MSA, msa, "msa", MIPS_HWCAP_MSA, 0) \ - FEATURE(MIPS_EVA, eva, "eva", 0, 0) \ - FEATURE(MIPS_R6, r6, "r6", MIPS_HWCAP_R6, 0) -#define DEFINE_TABLE_FEATURE_TYPE MipsFeatures -#include "define_tables.h" - static bool HandleMipsLine(const LineResult result, MipsFeatures* const features) { StringView key, value; @@ -77,16 +84,5 @@ MipsInfo GetMipsInfo(void) { return info; } -//////////////////////////////////////////////////////////////////////////////// -// Introspection functions - -int GetMipsFeaturesEnumValue(const MipsFeatures* features, - MipsFeaturesEnum value) { - if (value >= MIPS_LAST_) return false; - return kGetters[value](features); -} - -const char* GetMipsFeaturesEnumName(MipsFeaturesEnum value) { - if (value >= MIPS_LAST_) return "unknown feature"; - return kCpuInfoFlags[value]; -} +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_MIPS diff --git a/src/cpuinfo_ppc.c b/src/impl_ppc_linux.c similarity index 50% rename from src/cpuinfo_ppc.c rename to src/impl_ppc_linux.c index 53dc1d59..13a381a8 100644 --- a/src/cpuinfo_ppc.c +++ b/src/impl_ppc_linux.c @@ -12,11 +12,72 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_PPC +#ifdef CPU_FEATURES_OS_LINUX + #include "cpuinfo_ppc.h" -#include +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(PPC_32, ppc32, "ppc32", PPC_FEATURE_32, 0) \ + LINE(PPC_64, ppc64, "ppc64", PPC_FEATURE_64, 0) \ + LINE(PPC_601_INSTR, ppc601, "ppc601", PPC_FEATURE_601_INSTR, 0) \ + LINE(PPC_HAS_ALTIVEC, altivec, "altivec", PPC_FEATURE_HAS_ALTIVEC, 0) \ + LINE(PPC_HAS_FPU, fpu, "fpu", PPC_FEATURE_HAS_FPU, 0) \ + LINE(PPC_HAS_MMU, mmu, "mmu", PPC_FEATURE_HAS_MMU, 0) \ + LINE(PPC_HAS_4xxMAC, mac_4xx, "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0) \ + LINE(PPC_UNIFIED_CACHE, unifiedcache, "ucache", PPC_FEATURE_UNIFIED_CACHE, \ + 0) \ + LINE(PPC_HAS_SPE, spe, "spe", PPC_FEATURE_HAS_SPE, 0) \ + LINE(PPC_HAS_EFP_SINGLE, efpsingle, "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, \ + 0) \ + LINE(PPC_HAS_EFP_DOUBLE, efpdouble, "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, \ + 0) \ + LINE(PPC_NO_TB, no_tb, "notb", PPC_FEATURE_NO_TB, 0) \ + LINE(PPC_POWER4, power4, "power4", PPC_FEATURE_POWER4, 0) \ + LINE(PPC_POWER5, power5, "power5", PPC_FEATURE_POWER5, 0) \ + LINE(PPC_POWER5_PLUS, power5plus, "power5+", PPC_FEATURE_POWER5_PLUS, 0) \ + LINE(PPC_CELL, cell, "cellbe", PPC_FEATURE_CELL, 0) \ + LINE(PPC_BOOKE, booke, "booke", PPC_FEATURE_BOOKE, 0) \ + LINE(PPC_SMT, smt, "smt", PPC_FEATURE_SMT, 0) \ + LINE(PPC_ICACHE_SNOOP, icachesnoop, "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0) \ + LINE(PPC_ARCH_2_05, arch205, "arch_2_05", PPC_FEATURE_ARCH_2_05, 0) \ + LINE(PPC_PA6T, pa6t, "pa6t", PPC_FEATURE_PA6T, 0) \ + LINE(PPC_HAS_DFP, dfp, "dfp", PPC_FEATURE_HAS_DFP, 0) \ + LINE(PPC_POWER6_EXT, power6ext, "power6x", PPC_FEATURE_POWER6_EXT, 0) \ + LINE(PPC_ARCH_2_06, arch206, "arch_2_06", PPC_FEATURE_ARCH_2_06, 0) \ + LINE(PPC_HAS_VSX, vsx, "vsx", PPC_FEATURE_HAS_VSX, 0) \ + LINE(PPC_PSERIES_PERFMON_COMPAT, pseries_perfmon_compat, "archpmu", \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0) \ + LINE(PPC_TRUE_LE, truele, "true_le", PPC_FEATURE_TRUE_LE, 0) \ + LINE(PPC_PPC_LE, ppcle, "ppcle", PPC_FEATURE_PPC_LE, 0) \ + LINE(PPC_ARCH_2_07, arch207, "arch_2_07", 0, PPC_FEATURE2_ARCH_2_07) \ + LINE(PPC_HTM, htm, "htm", 0, PPC_FEATURE2_HTM) \ + LINE(PPC_DSCR, dscr, "dscr", 0, PPC_FEATURE2_DSCR) \ + LINE(PPC_EBB, ebb, "ebb", 0, PPC_FEATURE2_EBB) \ + LINE(PPC_ISEL, isel, "isel", 0, PPC_FEATURE2_ISEL) \ + LINE(PPC_TAR, tar, "tar", 0, PPC_FEATURE2_TAR) \ + LINE(PPC_VEC_CRYPTO, vcrypto, "vcrypto", 0, PPC_FEATURE2_VEC_CRYPTO) \ + LINE(PPC_HTM_NOSC, htm_nosc, "htm-nosc", 0, PPC_FEATURE2_HTM_NOSC) \ + LINE(PPC_ARCH_3_00, arch300, "arch_3_00", 0, PPC_FEATURE2_ARCH_3_00) \ + LINE(PPC_HAS_IEEE128, ieee128, "ieee128", 0, PPC_FEATURE2_HAS_IEEE128) \ + LINE(PPC_DARN, darn, "darn", 0, PPC_FEATURE2_DARN) \ + LINE(PPC_SCV, scv, "scv", 0, PPC_FEATURE2_SCV) \ + LINE(PPC_HTM_NO_SUSPEND, htm_no_suspend, "htm-no-suspend", 0, \ + PPC_FEATURE2_HTM_NO_SUSPEND) +#define INTROSPECTION_PREFIX PPC +#define INTROSPECTION_ENUM_PREFIX PPC +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + #include -#include #include "internal/bit_utils.h" #include "internal/filesystem.h" @@ -24,59 +85,6 @@ #include "internal/stack_line_reader.h" #include "internal/string_view.h" -// Generation of feature's getters/setters functions and kGetters, kSetters, -// kCpuInfoFlags and kHardwareCapabilities global tables. -#define DEFINE_TABLE_FEATURES \ - FEATURE(PPC_32, ppc32, "ppc32", PPC_FEATURE_32, 0) \ - FEATURE(PPC_64, ppc64, "ppc64", PPC_FEATURE_64, 0) \ - FEATURE(PPC_601_INSTR, ppc601, "ppc601", PPC_FEATURE_601_INSTR, 0) \ - FEATURE(PPC_HAS_ALTIVEC, altivec, "altivec", PPC_FEATURE_HAS_ALTIVEC, 0) \ - FEATURE(PPC_HAS_FPU, fpu, "fpu", PPC_FEATURE_HAS_FPU, 0) \ - FEATURE(PPC_HAS_MMU, mmu, "mmu", PPC_FEATURE_HAS_MMU, 0) \ - FEATURE(PPC_HAS_4xxMAC, mac_4xx, "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0) \ - FEATURE(PPC_UNIFIED_CACHE, unifiedcache, "ucache", \ - PPC_FEATURE_UNIFIED_CACHE, 0) \ - FEATURE(PPC_HAS_SPE, spe, "spe", PPC_FEATURE_HAS_SPE, 0) \ - FEATURE(PPC_HAS_EFP_SINGLE, efpsingle, "efpsingle", \ - PPC_FEATURE_HAS_EFP_SINGLE, 0) \ - FEATURE(PPC_HAS_EFP_DOUBLE, efpdouble, "efpdouble", \ - PPC_FEATURE_HAS_EFP_DOUBLE, 0) \ - FEATURE(PPC_NO_TB, no_tb, "notb", PPC_FEATURE_NO_TB, 0) \ - FEATURE(PPC_POWER4, power4, "power4", PPC_FEATURE_POWER4, 0) \ - FEATURE(PPC_POWER5, power5, "power5", PPC_FEATURE_POWER5, 0) \ - FEATURE(PPC_POWER5_PLUS, power5plus, "power5+", PPC_FEATURE_POWER5_PLUS, 0) \ - FEATURE(PPC_CELL, cell, "cellbe", PPC_FEATURE_CELL, 0) \ - FEATURE(PPC_BOOKE, booke, "booke", PPC_FEATURE_BOOKE, 0) \ - FEATURE(PPC_SMT, smt, "smt", PPC_FEATURE_SMT, 0) \ - FEATURE(PPC_ICACHE_SNOOP, icachesnoop, "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, \ - 0) \ - FEATURE(PPC_ARCH_2_05, arch205, "arch_2_05", PPC_FEATURE_ARCH_2_05, 0) \ - FEATURE(PPC_PA6T, pa6t, "pa6t", PPC_FEATURE_PA6T, 0) \ - FEATURE(PPC_HAS_DFP, dfp, "dfp", PPC_FEATURE_HAS_DFP, 0) \ - FEATURE(PPC_POWER6_EXT, power6ext, "power6x", PPC_FEATURE_POWER6_EXT, 0) \ - FEATURE(PPC_ARCH_2_06, arch206, "arch_2_06", PPC_FEATURE_ARCH_2_06, 0) \ - FEATURE(PPC_HAS_VSX, vsx, "vsx", PPC_FEATURE_HAS_VSX, 0) \ - FEATURE(PPC_PSERIES_PERFMON_COMPAT, pseries_perfmon_compat, "archpmu", \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0) \ - FEATURE(PPC_TRUE_LE, truele, "true_le", PPC_FEATURE_TRUE_LE, 0) \ - FEATURE(PPC_PPC_LE, ppcle, "ppcle", PPC_FEATURE_PPC_LE, 0) \ - FEATURE(PPC_ARCH_2_07, arch207, "arch_2_07", 0, PPC_FEATURE2_ARCH_2_07) \ - FEATURE(PPC_HTM, htm, "htm", 0, PPC_FEATURE2_HTM) \ - FEATURE(PPC_DSCR, dscr, "dscr", 0, PPC_FEATURE2_DSCR) \ - FEATURE(PPC_EBB, ebb, "ebb", 0, PPC_FEATURE2_EBB) \ - FEATURE(PPC_ISEL, isel, "isel", 0, PPC_FEATURE2_ISEL) \ - FEATURE(PPC_TAR, tar, "tar", 0, PPC_FEATURE2_TAR) \ - FEATURE(PPC_VEC_CRYPTO, vcrypto, "vcrypto", 0, PPC_FEATURE2_VEC_CRYPTO) \ - FEATURE(PPC_HTM_NOSC, htm_nosc, "htm-nosc", 0, PPC_FEATURE2_HTM_NOSC) \ - FEATURE(PPC_ARCH_3_00, arch300, "arch_3_00", 0, PPC_FEATURE2_ARCH_3_00) \ - FEATURE(PPC_HAS_IEEE128, ieee128, "ieee128", 0, PPC_FEATURE2_HAS_IEEE128) \ - FEATURE(PPC_DARN, darn, "darn", 0, PPC_FEATURE2_DARN) \ - FEATURE(PPC_SCV, scv, "scv", 0, PPC_FEATURE2_SCV) \ - FEATURE(PPC_HTM_NO_SUSPEND, htm_no_suspend, "htm-no-suspend", 0, \ - PPC_FEATURE2_HTM_NO_SUSPEND) -#define DEFINE_TABLE_FEATURE_TYPE PPCFeatures -#include "define_tables.h" - static bool HandlePPCLine(const LineResult result, PPCPlatformStrings* const strings) { StringView line = result.line; @@ -150,16 +158,5 @@ PPCPlatformStrings GetPPCPlatformStrings(void) { return strings; } -//////////////////////////////////////////////////////////////////////////////// -// Introspection functions - -int GetPPCFeaturesEnumValue(const PPCFeatures* features, - PPCFeaturesEnum value) { - if (value >= PPC_LAST_) return false; - return kGetters[value](features); -} - -const char* GetPPCFeaturesEnumName(PPCFeaturesEnum value) { - if (value >= PPC_LAST_) return "unknown feature"; - return kCpuInfoFlags[value]; -} +#endif // CPU_FEATURES_OS_LINUX +#endif // CPU_FEATURES_ARCH_PPC diff --git a/src/cpuinfo_x86.c b/src/impl_x86__base_implementation.inl similarity index 82% rename from src/cpuinfo_x86.c rename to src/impl_x86__base_implementation.inl index b7adac25..3f474123 100644 --- a/src/cpuinfo_x86.c +++ b/src/impl_x86__base_implementation.inl @@ -13,11 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "cpuinfo_x86.h" - #include #include +#include "copy.h" +#include "cpuinfo_x86.h" +#include "equals.h" #include "internal/bit_utils.h" #include "internal/cpuid_x86.h" @@ -25,94 +26,6 @@ #error "Cannot compile cpuinfo_x86 on a non x86 platform." #endif -// Generation of feature's getters/setters functions and kGetters, kSetters, -// kCpuInfoFlags global tables. -#define DEFINE_TABLE_FEATURES \ - FEATURE(X86_FPU, fpu, "fpu", 0, 0) \ - FEATURE(X86_TSC, tsc, "tsc", 0, 0) \ - FEATURE(X86_CX8, cx8, "cx8", 0, 0) \ - FEATURE(X86_CLFSH, clfsh, "clfsh", 0, 0) \ - FEATURE(X86_MMX, mmx, "mmx", 0, 0) \ - FEATURE(X86_AES, aes, "aes", 0, 0) \ - FEATURE(X86_ERMS, erms, "erms", 0, 0) \ - FEATURE(X86_F16C, f16c, "f16c", 0, 0) \ - FEATURE(X86_FMA4, fma4, "fma4", 0, 0) \ - FEATURE(X86_FMA3, fma3, "fma3", 0, 0) \ - FEATURE(X86_VAES, vaes, "vaes", 0, 0) \ - FEATURE(X86_VPCLMULQDQ, vpclmulqdq, "vpclmulqdq", 0, 0) \ - FEATURE(X86_BMI1, bmi1, "bmi1", 0, 0) \ - FEATURE(X86_HLE, hle, "hle", 0, 0) \ - FEATURE(X86_BMI2, bmi2, "bmi2", 0, 0) \ - FEATURE(X86_RTM, rtm, "rtm", 0, 0) \ - FEATURE(X86_RDSEED, rdseed, "rdseed", 0, 0) \ - FEATURE(X86_CLFLUSHOPT, clflushopt, "clflushopt", 0, 0) \ - FEATURE(X86_CLWB, clwb, "clwb", 0, 0) \ - FEATURE(X86_SSE, sse, "sse", 0, 0) \ - FEATURE(X86_SSE2, sse2, "sse2", 0, 0) \ - FEATURE(X86_SSE3, sse3, "sse3", 0, 0) \ - FEATURE(X86_SSSE3, ssse3, "ssse3", 0, 0) \ - FEATURE(X86_SSE4_1, sse4_1, "sse4_1", 0, 0) \ - FEATURE(X86_SSE4_2, sse4_2, "sse4_2", 0, 0) \ - FEATURE(X86_SSE4A, sse4a, "sse4a", 0, 0) \ - FEATURE(X86_AVX, avx, "avx", 0, 0) \ - FEATURE(X86_AVX2, avx2, "avx2", 0, 0) \ - FEATURE(X86_AVX512F, avx512f, "avx512f", 0, 0) \ - FEATURE(X86_AVX512CD, avx512cd, "avx512cd", 0, 0) \ - FEATURE(X86_AVX512ER, avx512er, "avx512er", 0, 0) \ - FEATURE(X86_AVX512PF, avx512pf, "avx512pf", 0, 0) \ - FEATURE(X86_AVX512BW, avx512bw, "avx512bw", 0, 0) \ - FEATURE(X86_AVX512DQ, avx512dq, "avx512dq", 0, 0) \ - FEATURE(X86_AVX512VL, avx512vl, "avx512vl", 0, 0) \ - FEATURE(X86_AVX512IFMA, avx512ifma, "avx512ifma", 0, 0) \ - FEATURE(X86_AVX512VBMI, avx512vbmi, "avx512vbmi", 0, 0) \ - FEATURE(X86_AVX512VBMI2, avx512vbmi2, "avx512vbmi2", 0, 0) \ - FEATURE(X86_AVX512VNNI, avx512vnni, "avx512vnni", 0, 0) \ - FEATURE(X86_AVX512BITALG, avx512bitalg, "avx512bitalg", 0, 0) \ - FEATURE(X86_AVX512VPOPCNTDQ, avx512vpopcntdq, "avx512vpopcntdq", 0, 0) \ - FEATURE(X86_AVX512_4VNNIW, avx512_4vnniw, "avx512_4vnniw", 0, 0) \ - FEATURE(X86_AVX512_4VBMI2, avx512_4vbmi2, "avx512_4vbmi2", 0, 0) \ - FEATURE(X86_AVX512_SECOND_FMA, avx512_second_fma, "avx512_second_fma", 0, 0) \ - FEATURE(X86_AVX512_4FMAPS, avx512_4fmaps, "avx512_4fmaps", 0, 0) \ - FEATURE(X86_AVX512_BF16, avx512_bf16, "avx512_bf16", 0, 0) \ - FEATURE(X86_AVX512_VP2INTERSECT, avx512_vp2intersect, "avx512_vp2intersect", \ - 0, 0) \ - FEATURE(X86_AMX_BF16, amx_bf16, "amx_bf16", 0, 0) \ - FEATURE(X86_AMX_TILE, amx_tile, "amx_tile", 0, 0) \ - FEATURE(X86_AMX_INT8, amx_int8, "amx_int8", 0, 0) \ - FEATURE(X86_PCLMULQDQ, pclmulqdq, "pclmulqdq", 0, 0) \ - FEATURE(X86_SMX, smx, "smx", 0, 0) \ - FEATURE(X86_SGX, sgx, "sgx", 0, 0) \ - FEATURE(X86_CX16, cx16, "cx16", 0, 0) \ - FEATURE(X86_SHA, sha, "sha", 0, 0) \ - FEATURE(X86_POPCNT, popcnt, "popcnt", 0, 0) \ - FEATURE(X86_MOVBE, movbe, "movbe", 0, 0) \ - FEATURE(X86_RDRND, rdrnd, "rdrnd", 0, 0) \ - FEATURE(X86_DCA, dca, "dca", 0, 0) \ - FEATURE(X86_SS, ss, "ss", 0, 0) \ - FEATURE(X86_ADX, adx, "adx", 0, 0) -#define DEFINE_TABLE_FEATURE_TYPE X86Features -#define DEFINE_TABLE_DONT_GENERATE_HWCAPS -#include "define_tables.h" - -// The following includes are necessary to provide SSE detections on pre-AVX -// microarchitectures. -#if defined(CPU_FEATURES_OS_WINDOWS) -#include // IsProcessorFeaturePresent -#elif defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) || \ - defined(CPU_FEATURES_OS_FREEBSD) -#include "internal/filesystem.h" // Needed to parse /proc/cpuinfo -#include "internal/stack_line_reader.h" // Needed to parse /proc/cpuinfo -#elif defined(CPU_FEATURES_OS_DARWIN) -#if !defined(HAVE_SYSCTLBYNAME) -#error "Darwin needs support for sysctlbyname" -#endif -#include -#else -#error "Unsupported OS" -#endif // CPU_FEATURES_OS - -#include "internal/string_view.h" - //////////////////////////////////////////////////////////////////////////////// // Definitions for CpuId and GetXCR0Eax. //////////////////////////////////////////////////////////////////////////////// @@ -176,6 +89,11 @@ static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { return SafeCpuIdEx(max_cpuid_leaf, leaf_id, 0); } +//////////////////////////////////////////////////////////////////////////////// +// OS support +// TODO: Add documentation +//////////////////////////////////////////////////////////////////////////////// + #define MASK_XMM 0x2 #define MASK_YMM 0x4 #define MASK_MASKREG 0x20 @@ -214,6 +132,47 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) { MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA); } +//////////////////////////////////////////////////////////////////////////////// +// Vendor +//////////////////////////////////////////////////////////////////////////////// + +static void SetVendor(const Leaf leaf, char* const vendor) { + *(uint32_t*)(vendor) = leaf.ebx; + *(uint32_t*)(vendor + 4) = leaf.edx; + *(uint32_t*)(vendor + 8) = leaf.ecx; + vendor[12] = '\0'; +} + +static int IsVendor(const Leaf leaf, const char* const name) { + const uint32_t ebx = *(const uint32_t*)(name); + const uint32_t edx = *(const uint32_t*)(name + 4); + const uint32_t ecx = *(const uint32_t*)(name + 8); + return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; +} + +static int IsVendorByX86Info(const X86Info* info, const char* const name) { + return equals(info->vendor, name, sizeof(info->vendor)); +} + +void FillX86BrandString(char brand_string[49]) { + const Leaf leaf_ext_0 = CpuId(0x80000000); + const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; + const Leaf leaves[3] = { + SafeCpuId(max_cpuid_leaf_ext, 0x80000002), + SafeCpuId(max_cpuid_leaf_ext, 0x80000003), + SafeCpuId(max_cpuid_leaf_ext, 0x80000004), + }; +#if __STDC_VERSION__ >= 201112L + _Static_assert(sizeof(leaves) == 48, "Leaves must be packed"); +#endif + copy(brand_string, (const char*)(leaves), 48); + brand_string[48] = '\0'; +} + +//////////////////////////////////////////////////////////////////////////////// +// CpuId +//////////////////////////////////////////////////////////////////////////////// + static bool HasSecondFMA(uint32_t model) { // Skylake server if (model == 0x55) { @@ -243,82 +202,464 @@ static bool HasSecondFMA(uint32_t model) { return true; } -static void SetVendor(const Leaf leaf, char* const vendor) { - *(uint32_t*)(vendor) = leaf.ebx; - *(uint32_t*)(vendor + 4) = leaf.edx; - *(uint32_t*)(vendor + 8) = leaf.ecx; - vendor[12] = '\0'; +// Internal structure to hold the OS support for vector operations. +// Avoid to recompute them since each call to cpuid is ~100 cycles. +typedef struct { + bool sse_registers; + bool avx_registers; + bool avx512_registers; + bool amx_registers; +} OsPreserves; + +// These two functions have to be implemented by the OS, that is the file +// including this file. +static void OverrideOsPreserves(OsPreserves* os_preserves); +static void DetectFeaturesFromOs(X86Features* features); + +// Reference https://en.wikipedia.org/wiki/CPUID. +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, + OsPreserves* os_preserves) { + const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); + const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); + const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); + + const bool have_xsave = IsBitSet(leaf_1.ecx, 26); + const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); + const bool have_xcr0 = have_xsave && have_osxsave; + + const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); + const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); + const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); + const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); + + X86Features* const features = &info->features; + + info->family = extended_family + family; + info->model = (extended_model << 4) + model; + info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); + + features->fpu = IsBitSet(leaf_1.edx, 0); + features->tsc = IsBitSet(leaf_1.edx, 4); + features->cx8 = IsBitSet(leaf_1.edx, 8); + features->clfsh = IsBitSet(leaf_1.edx, 19); + features->mmx = IsBitSet(leaf_1.edx, 23); + features->ss = IsBitSet(leaf_1.edx, 27); + features->pclmulqdq = IsBitSet(leaf_1.ecx, 1); + features->smx = IsBitSet(leaf_1.ecx, 6); + features->cx16 = IsBitSet(leaf_1.ecx, 13); + features->dca = IsBitSet(leaf_1.ecx, 18); + features->movbe = IsBitSet(leaf_1.ecx, 22); + features->popcnt = IsBitSet(leaf_1.ecx, 23); + features->aes = IsBitSet(leaf_1.ecx, 25); + features->f16c = IsBitSet(leaf_1.ecx, 29); + features->rdrnd = IsBitSet(leaf_1.ecx, 30); + features->sgx = IsBitSet(leaf_7.ebx, 2); + features->bmi1 = IsBitSet(leaf_7.ebx, 3); + features->hle = IsBitSet(leaf_7.ebx, 4); + features->bmi2 = IsBitSet(leaf_7.ebx, 8); + features->erms = IsBitSet(leaf_7.ebx, 9); + features->rtm = IsBitSet(leaf_7.ebx, 11); + features->rdseed = IsBitSet(leaf_7.ebx, 18); + features->clflushopt = IsBitSet(leaf_7.ebx, 23); + features->clwb = IsBitSet(leaf_7.ebx, 24); + features->sha = IsBitSet(leaf_7.ebx, 29); + features->vaes = IsBitSet(leaf_7.ecx, 9); + features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); + features->adx = IsBitSet(leaf_7.ebx, 19); + + ///////////////////////////////////////////////////////////////////////////// + // The following section is devoted to Vector Extensions. + ///////////////////////////////////////////////////////////////////////////// + + // CPU with AVX expose XCR0 which enables checking vector extensions OS + // support through cpuid. + if (have_xcr0) { + // Here we rely exclusively on cpuid for both CPU and OS support of vector + // extensions. + const uint32_t xcr0_eax = GetXCR0Eax(); + os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax); + os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax); + os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax); + os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax); + OverrideOsPreserves(os_preserves); + + if (os_preserves->sse_registers) { + features->sse = IsBitSet(leaf_1.edx, 25); + features->sse2 = IsBitSet(leaf_1.edx, 26); + features->sse3 = IsBitSet(leaf_1.ecx, 0); + features->ssse3 = IsBitSet(leaf_1.ecx, 9); + features->sse4_1 = IsBitSet(leaf_1.ecx, 19); + features->sse4_2 = IsBitSet(leaf_1.ecx, 20); + } + if (os_preserves->avx_registers) { + features->fma3 = IsBitSet(leaf_1.ecx, 12); + features->avx = IsBitSet(leaf_1.ecx, 28); + features->avx2 = IsBitSet(leaf_7.ebx, 5); + } + if (os_preserves->avx512_registers) { + features->avx512f = IsBitSet(leaf_7.ebx, 16); + features->avx512cd = IsBitSet(leaf_7.ebx, 28); + features->avx512er = IsBitSet(leaf_7.ebx, 27); + features->avx512pf = IsBitSet(leaf_7.ebx, 26); + features->avx512bw = IsBitSet(leaf_7.ebx, 30); + features->avx512dq = IsBitSet(leaf_7.ebx, 17); + features->avx512vl = IsBitSet(leaf_7.ebx, 31); + features->avx512ifma = IsBitSet(leaf_7.ebx, 21); + features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); + features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); + features->avx512vnni = IsBitSet(leaf_7.ecx, 11); + features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); + features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); + features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + features->avx512_second_fma = HasSecondFMA(info->model); + features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); + features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); + features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); + } + if (os_preserves->amx_registers) { + features->amx_bf16 = IsBitSet(leaf_7.edx, 22); + features->amx_tile = IsBitSet(leaf_7.edx, 24); + features->amx_int8 = IsBitSet(leaf_7.edx, 25); + } + } else { + // When XCR0 is not available (Atom based or older cpus) we need to defer to + // the OS via custom code. + DetectFeaturesFromOs(features); + // Now that we have queried the OS for SSE support, we report this back to + // os_preserves. This is needed in case of AMD CPU's to enable testing of + // sse4a (See ParseExtraAMDCpuId below). + if (features->sse) os_preserves->sse_registers = true; + } } -static int IsVendor(const Leaf leaf, const char* const name) { - const uint32_t ebx = *(const uint32_t*)(name); - const uint32_t edx = *(const uint32_t*)(name + 4); - const uint32_t ecx = *(const uint32_t*)(name + 8); - return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; +// Reference +// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. +static Leaf GetLeafByIdAMD(uint32_t leaf_id) { + uint32_t max_extended = CpuId(0x80000000).eax; + return SafeCpuId(max_extended, leaf_id); } -static int IsVendorByX86Info(const X86Info* info, const char* const name) { - return memcmp(info->vendor, name, sizeof(info->vendor)) == 0; +static void ParseExtraAMDCpuId(X86Info* info, OsPreserves os_preserves) { + const Leaf leaf_80000001 = GetLeafByIdAMD(0x80000001); + + X86Features* const features = &info->features; + + if (os_preserves.sse_registers) { + features->sse4a = IsBitSet(leaf_80000001.ecx, 6); + } + + if (os_preserves.avx_registers) { + features->fma4 = IsBitSet(leaf_80000001.ecx, 16); + } } -static const CacheLevelInfo kEmptyCacheLevelInfo; +static const X86Info kEmptyX86Info; +static const OsPreserves kEmptyOsPreserves; -static CacheLevelInfo GetCacheLevelInfo(const uint32_t reg) { - const int UNDEF = -1; - const int KiB = 1024; - const int MiB = 1024 * KiB; - switch (reg) { - case 0x01: - return (CacheLevelInfo){.level = UNDEF, - .cache_type = CPU_FEATURE_CACHE_TLB, - .cache_size = 4 * KiB, - .ways = 4, - .line_size = UNDEF, - .tlb_entries = 32, - .partitioning = 0}; - case 0x02: - return (CacheLevelInfo){.level = UNDEF, - .cache_type = CPU_FEATURE_CACHE_TLB, - .cache_size = 4 * MiB, - .ways = 0xFF, - .line_size = UNDEF, - .tlb_entries = 2, - .partitioning = 0}; - case 0x03: - return (CacheLevelInfo){.level = UNDEF, - .cache_type = CPU_FEATURE_CACHE_TLB, - .cache_size = 4 * KiB, - .ways = 4, - .line_size = UNDEF, - .tlb_entries = 64, - .partitioning = 0}; - case 0x04: - return (CacheLevelInfo){.level = UNDEF, - .cache_type = CPU_FEATURE_CACHE_TLB, - .cache_size = 4 * MiB, - .ways = 4, - .line_size = UNDEF, - .tlb_entries = 8, - .partitioning = 0}; - case 0x05: - return (CacheLevelInfo){.level = UNDEF, - .cache_type = CPU_FEATURE_CACHE_TLB, - .cache_size = 4 * MiB, - .ways = 4, - .line_size = UNDEF, - .tlb_entries = 32, - .partitioning = 0}; - case 0x06: - return (CacheLevelInfo){.level = 1, - .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, - .cache_size = 8 * KiB, - .ways = 4, - .line_size = 32, - .tlb_entries = UNDEF, - .partitioning = 0}; - case 0x08: - return (CacheLevelInfo){.level = 1, - .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, +X86Info GetX86Info(void) { + X86Info info = kEmptyX86Info; + const Leaf leaf_0 = CpuId(0); + const bool is_intel = IsVendor(leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL); + const bool is_amd = IsVendor(leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD); + const bool is_hygon = IsVendor(leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE); + SetVendor(leaf_0, info.vendor); + if (is_intel || is_amd || is_hygon) { + OsPreserves os_preserves = kEmptyOsPreserves; + const uint32_t max_cpuid_leaf = leaf_0.eax; + ParseCpuId(max_cpuid_leaf, &info, &os_preserves); + if (is_amd || is_hygon) { + ParseExtraAMDCpuId(&info, os_preserves); + } + } + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Microarchitecture +//////////////////////////////////////////////////////////////////////////////// + +#define CPUID(FAMILY, MODEL) ((((FAMILY)&0xFF) << 8) | ((MODEL)&0xFF)) + +X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_GENUINE_INTEL)) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x06, 0x1C): // Intel(R) Atom(TM) CPU 230 @ 1.60GHz + case CPUID(0x06, 0x35): + case CPUID(0x06, 0x36): + case CPUID(0x06, 0x70): // https://en.wikichip.org/wiki/intel/atom/230 + // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) + return INTEL_ATOM_BNL; + case CPUID(0x06, 0x37): + case CPUID(0x06, 0x4C): + // https://en.wikipedia.org/wiki/Silvermont + return INTEL_ATOM_SMT; + case CPUID(0x06, 0x5C): + // https://en.wikipedia.org/wiki/Goldmont + return INTEL_ATOM_GMT; + case CPUID(0x06, 0x0F): + case CPUID(0x06, 0x16): + // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) + return INTEL_CORE; + case CPUID(0x06, 0x17): + case CPUID(0x06, 0x1D): + // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) + return INTEL_PNR; + case CPUID(0x06, 0x1A): + case CPUID(0x06, 0x1E): + case CPUID(0x06, 0x1F): + case CPUID(0x06, 0x2E): + // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) + return INTEL_NHM; + case CPUID(0x06, 0x25): + case CPUID(0x06, 0x2C): + case CPUID(0x06, 0x2F): + // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) + return INTEL_WSM; + case CPUID(0x06, 0x2A): + case CPUID(0x06, 0x2D): + // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings + return INTEL_SNB; + case CPUID(0x06, 0x3A): + case CPUID(0x06, 0x3E): + // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings + return INTEL_IVB; + case CPUID(0x06, 0x3C): + case CPUID(0x06, 0x3F): + case CPUID(0x06, 0x45): + case CPUID(0x06, 0x46): + // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) + return INTEL_HSW; + case CPUID(0x06, 0x3D): + case CPUID(0x06, 0x47): + case CPUID(0x06, 0x4F): + case CPUID(0x06, 0x56): + // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) + return INTEL_BDW; + case CPUID(0x06, 0x4E): + case CPUID(0x06, 0x55): + case CPUID(0x06, 0x5E): + // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) + return INTEL_SKL; + case CPUID(0x06, 0x66): + // https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture) + return INTEL_CNL; + case CPUID(0x06, 0x7D): // client + case CPUID(0x06, 0x7E): // client + case CPUID(0x06, 0x9D): // NNP-I + case CPUID(0x06, 0x6A): // server + case CPUID(0x06, 0x6C): // server + // https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor) + return INTEL_ICL; + case CPUID(0x06, 0x8C): + case CPUID(0x06, 0x8D): + // https://en.wikipedia.org/wiki/Tiger_Lake_(microarchitecture) + return INTEL_TGL; + case CPUID(0x06, 0x8F): + // https://en.wikipedia.org/wiki/Sapphire_Rapids + return INTEL_SPR; + case CPUID(0x06, 0x8E): + switch (info->stepping) { + case 9: + return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake + case 10: + return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake + case 11: + return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture) + default: + return X86_UNKNOWN; + } + case CPUID(0x06, 0x9E): + if (info->stepping > 9) { + // https://en.wikipedia.org/wiki/Coffee_Lake + return INTEL_CFL; + } else { + // https://en.wikipedia.org/wiki/Kaby_Lake + return INTEL_KBL; + } + default: + return X86_UNKNOWN; + } + } + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_AUTHENTIC_AMD)) { + switch (CPUID(info->family, info->model)) { + // https://en.wikichip.org/wiki/amd/cpuid + case CPUID(0xF, 0x04): + case CPUID(0xF, 0x05): + case CPUID(0xF, 0x07): + case CPUID(0xF, 0x08): + case CPUID(0xF, 0x0C): + case CPUID(0xF, 0x0E): + case CPUID(0xF, 0x0F): + case CPUID(0xF, 0x14): + case CPUID(0xF, 0x15): + case CPUID(0xF, 0x17): + case CPUID(0xF, 0x18): + case CPUID(0xF, 0x1B): + case CPUID(0xF, 0x1C): + case CPUID(0xF, 0x1F): + case CPUID(0xF, 0x21): + case CPUID(0xF, 0x23): + case CPUID(0xF, 0x24): + case CPUID(0xF, 0x25): + case CPUID(0xF, 0x27): + case CPUID(0xF, 0x2B): + case CPUID(0xF, 0x2C): + case CPUID(0xF, 0x2F): + case CPUID(0xF, 0x41): + case CPUID(0xF, 0x43): + case CPUID(0xF, 0x48): + case CPUID(0xF, 0x4B): + case CPUID(0xF, 0x4C): + case CPUID(0xF, 0x4F): + case CPUID(0xF, 0x5D): + case CPUID(0xF, 0x5F): + case CPUID(0xF, 0x68): + case CPUID(0xF, 0x6B): + case CPUID(0xF, 0x6F): + case CPUID(0xF, 0x7F): + case CPUID(0xF, 0xC1): + return AMD_HAMMER; + case CPUID(0x10, 0x02): + case CPUID(0x10, 0x04): + case CPUID(0x10, 0x05): + case CPUID(0x10, 0x06): + case CPUID(0x10, 0x08): + case CPUID(0x10, 0x09): + case CPUID(0x10, 0x0A): + return AMD_K10; + case CPUID(0x11, 0x03): + // http://developer.amd.com/wordpress/media/2012/10/41788.pdf + return AMD_K11; + case CPUID(0x12, 0x01): + // https://www.amd.com/system/files/TechDocs/44739_12h_Rev_Gd.pdf + return AMD_K12; + case CPUID(0x14, 0x00): + case CPUID(0x14, 0x01): + case CPUID(0x14, 0x02): + // https://www.amd.com/system/files/TechDocs/47534_14h_Mod_00h-0Fh_Rev_Guide.pdf + return AMD_BOBCAT; + case CPUID(0x15, 0x01): + // https://en.wikichip.org/wiki/amd/microarchitectures/bulldozer + return AMD_BULLDOZER; + case CPUID(0x15, 0x02): + case CPUID(0x15, 0x11): + case CPUID(0x15, 0x13): + // https://en.wikichip.org/wiki/amd/microarchitectures/piledriver + return AMD_PILEDRIVER; + case CPUID(0x15, 0x30): + case CPUID(0x15, 0x38): + // https://en.wikichip.org/wiki/amd/microarchitectures/steamroller + return AMD_STREAMROLLER; + case CPUID(0x15, 0x60): + case CPUID(0x15, 0x65): + case CPUID(0x15, 0x70): + // https://en.wikichip.org/wiki/amd/microarchitectures/excavator + return AMD_EXCAVATOR; + case CPUID(0x16, 0x00): + return AMD_JAGUAR; + case CPUID(0x16, 0x30): + return AMD_PUMA; + case CPUID(0x17, 0x01): + case CPUID(0x17, 0x11): + case CPUID(0x17, 0x18): + case CPUID(0x17, 0x20): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen + return AMD_ZEN; + case CPUID(0x17, 0x08): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B + return AMD_ZEN_PLUS; + case CPUID(0x17, 0x31): + case CPUID(0x17, 0x47): + case CPUID(0x17, 0x60): + case CPUID(0x17, 0x68): + case CPUID(0x17, 0x71): + case CPUID(0x17, 0x90): + case CPUID(0x17, 0x98): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen_2 + return AMD_ZEN2; + case CPUID(0x19, 0x01): + case CPUID(0x19, 0x21): + case CPUID(0x19, 0x30): + case CPUID(0x19, 0x40): + case CPUID(0x19, 0x50): + // https://en.wikichip.org/wiki/amd/microarchitectures/zen_3 + return AMD_ZEN3; + default: + return X86_UNKNOWN; + } + } + if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_HYGON_GENUINE)) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x18, 0x00): + return AMD_ZEN; + } + } + return X86_UNKNOWN; +} + +//////////////////////////////////////////////////////////////////////////////// +// CacheInfo +//////////////////////////////////////////////////////////////////////////////// + +static const CacheLevelInfo kEmptyCacheLevelInfo; + +static CacheLevelInfo GetCacheLevelInfo(const uint32_t reg) { + const int UNDEF = -1; + const int KiB = 1024; + const int MiB = 1024 * KiB; + switch (reg) { + case 0x01: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x02: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 0xFF, + .line_size = UNDEF, + .tlb_entries = 2, + .partitioning = 0}; + case 0x03: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * KiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 64, + .partitioning = 0}; + case 0x04: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 8, + .partitioning = 0}; + case 0x05: + return (CacheLevelInfo){.level = UNDEF, + .cache_type = CPU_FEATURE_CACHE_TLB, + .cache_size = 4 * MiB, + .ways = 4, + .line_size = UNDEF, + .tlb_entries = 32, + .partitioning = 0}; + case 0x06: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, + .cache_size = 8 * KiB, + .ways = 4, + .line_size = 32, + .tlb_entries = UNDEF, + .partitioning = 0}; + case 0x08: + return (CacheLevelInfo){.level = 1, + .cache_type = CPU_FEATURE_CACHE_INSTRUCTION, .cache_size = 16 * KiB, .ways = 4, .line_size = 32, @@ -1139,7 +1480,7 @@ static void ParseLeaf2(const int max_cpuid_leaf, CacheInfo* info) { #if __STDC_VERSION__ >= 201112L _Static_assert(sizeof(Leaf) == sizeof(data), "Leaf must be 16 bytes"); #endif - memcpy(&data, &leaf, sizeof(data)); + copy((char*)(data), (const char*)(&leaf), sizeof(data)); for (size_t i = 0; i < sizeof(data); ++i) { const uint8_t descriptor = data[i]; if (descriptor == 0) continue; @@ -1171,299 +1512,23 @@ static void ParseCacheInfo(const int max_cpuid_leaf, uint32_t leaf_id, else break; // Should not occur as per documentation. int level = ExtractBitRange(leaf.eax, 7, 5); - int line_size = ExtractBitRange(leaf.ebx, 11, 0) + 1; - int partitioning = ExtractBitRange(leaf.ebx, 21, 12) + 1; - int ways = ExtractBitRange(leaf.ebx, 31, 22) + 1; - int tlb_entries = leaf.ecx + 1; - int cache_size = ways * partitioning * line_size * tlb_entries; - info.levels[info.size] = (CacheLevelInfo){.level = level, - .cache_type = cache_type, - .cache_size = cache_size, - .ways = ways, - .line_size = line_size, - .tlb_entries = tlb_entries, - .partitioning = partitioning}; - ++info.size; - } - // Override CacheInfo if we successfully extracted Deterministic Cache - // Parameters. - if (info.size > 0) *old_info = info; -} - -#if defined(CPU_FEATURES_OS_DARWIN) -#if defined(CPU_FEATURES_MOCK_CPUID_X86) -extern bool GetDarwinSysCtlByName(const char*); -#else // CPU_FEATURES_MOCK_CPUID_X86 -static bool GetDarwinSysCtlByName(const char* name) { - int enabled; - size_t enabled_len = sizeof(enabled); - const int failure = sysctlbyname(name, &enabled, &enabled_len, NULL, 0); - return failure ? false : enabled; -} -#endif -#endif // CPU_FEATURES_OS_DARWIN - -// Internal structure to hold the OS support for vector operations. -// Avoid to recompute them since each call to cpuid is ~100 cycles. -typedef struct { - bool sse_registers; - bool avx_registers; - bool avx512_registers; - bool amx_registers; -} OsPreserves; - -#if defined(CPU_FEATURES_OS_WINDOWS) -#if defined(CPU_FEATURES_MOCK_CPUID_X86) -extern bool GetWindowsIsProcessorFeaturePresent(DWORD); -#else // CPU_FEATURES_MOCK_CPUID_X86 -static bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { - return IsProcessorFeaturePresent(ProcessorFeature); -} -#endif -#endif // CPU_FEATURES_OS_WINDOWS - -// Reference https://en.wikipedia.org/wiki/CPUID. -static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, - OsPreserves* os_preserves) { - const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); - const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); - const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); - - const bool have_xsave = IsBitSet(leaf_1.ecx, 26); - const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); - const bool have_xcr0 = have_xsave && have_osxsave; - - const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); - const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); - const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); - const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); - - X86Features* const features = &info->features; - - info->family = extended_family + family; - info->model = (extended_model << 4) + model; - info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); - - features->fpu = IsBitSet(leaf_1.edx, 0); - features->tsc = IsBitSet(leaf_1.edx, 4); - features->cx8 = IsBitSet(leaf_1.edx, 8); - features->clfsh = IsBitSet(leaf_1.edx, 19); - features->mmx = IsBitSet(leaf_1.edx, 23); - features->ss = IsBitSet(leaf_1.edx, 27); - features->pclmulqdq = IsBitSet(leaf_1.ecx, 1); - features->smx = IsBitSet(leaf_1.ecx, 6); - features->cx16 = IsBitSet(leaf_1.ecx, 13); - features->dca = IsBitSet(leaf_1.ecx, 18); - features->movbe = IsBitSet(leaf_1.ecx, 22); - features->popcnt = IsBitSet(leaf_1.ecx, 23); - features->aes = IsBitSet(leaf_1.ecx, 25); - features->f16c = IsBitSet(leaf_1.ecx, 29); - features->rdrnd = IsBitSet(leaf_1.ecx, 30); - features->sgx = IsBitSet(leaf_7.ebx, 2); - features->bmi1 = IsBitSet(leaf_7.ebx, 3); - features->hle = IsBitSet(leaf_7.ebx, 4); - features->bmi2 = IsBitSet(leaf_7.ebx, 8); - features->erms = IsBitSet(leaf_7.ebx, 9); - features->rtm = IsBitSet(leaf_7.ebx, 11); - features->rdseed = IsBitSet(leaf_7.ebx, 18); - features->clflushopt = IsBitSet(leaf_7.ebx, 23); - features->clwb = IsBitSet(leaf_7.ebx, 24); - features->sha = IsBitSet(leaf_7.ebx, 29); - features->vaes = IsBitSet(leaf_7.ecx, 9); - features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); - features->adx = IsBitSet(leaf_7.ebx, 19); - - ///////////////////////////////////////////////////////////////////////////// - // The following section is devoted to Vector Extensions. - ///////////////////////////////////////////////////////////////////////////// - - // CPU with AVX expose XCR0 which enables checking vector extensions OS - // support through cpuid. - if (have_xcr0) { - // Here we rely exclusively on cpuid for both CPU and OS support of vector - // extensions. - const uint32_t xcr0_eax = GetXCR0Eax(); - os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax); - os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax); -#if defined(CPU_FEATURES_OS_DARWIN) - // On Darwin AVX512 support is On-demand. - // We have to query the OS instead of querying the Zmm save/restore state. - // https://github.com/apple/darwin-xnu/blob/8f02f2a044b9bb1ad951987ef5bab20ec9486310/osfmk/i386/fpu.c#L173-L199 - os_preserves->avx512_registers = - GetDarwinSysCtlByName("hw.optional.avx512f"); -#else - os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax); -#endif // CPU_FEATURES_OS_DARWIN - os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax); - - if (os_preserves->sse_registers) { - features->sse = IsBitSet(leaf_1.edx, 25); - features->sse2 = IsBitSet(leaf_1.edx, 26); - features->sse3 = IsBitSet(leaf_1.ecx, 0); - features->ssse3 = IsBitSet(leaf_1.ecx, 9); - features->sse4_1 = IsBitSet(leaf_1.ecx, 19); - features->sse4_2 = IsBitSet(leaf_1.ecx, 20); - } - if (os_preserves->avx_registers) { - features->fma3 = IsBitSet(leaf_1.ecx, 12); - features->avx = IsBitSet(leaf_1.ecx, 28); - features->avx2 = IsBitSet(leaf_7.ebx, 5); - } - if (os_preserves->avx512_registers) { - features->avx512f = IsBitSet(leaf_7.ebx, 16); - features->avx512cd = IsBitSet(leaf_7.ebx, 28); - features->avx512er = IsBitSet(leaf_7.ebx, 27); - features->avx512pf = IsBitSet(leaf_7.ebx, 26); - features->avx512bw = IsBitSet(leaf_7.ebx, 30); - features->avx512dq = IsBitSet(leaf_7.ebx, 17); - features->avx512vl = IsBitSet(leaf_7.ebx, 31); - features->avx512ifma = IsBitSet(leaf_7.ebx, 21); - features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); - features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); - features->avx512vnni = IsBitSet(leaf_7.ecx, 11); - features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); - features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); - features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); - features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); - features->avx512_second_fma = HasSecondFMA(info->model); - features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); - features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); - features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); - } - if (os_preserves->amx_registers) { - features->amx_bf16 = IsBitSet(leaf_7.edx, 22); - features->amx_tile = IsBitSet(leaf_7.edx, 24); - features->amx_int8 = IsBitSet(leaf_7.edx, 25); - } - } else { - // When XCR0 is not available (Atom based or older cpus) we need to defer to - // the OS via custom code. -#if defined(CPU_FEATURES_OS_WINDOWS) - // Handling Windows platform through IsProcessorFeaturePresent. - // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent - features->sse = - GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); - features->sse2 = - GetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); - features->sse3 = - GetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); -#elif defined(CPU_FEATURES_OS_DARWIN) - // Handling Darwin platform through sysctlbyname. - features->sse = GetDarwinSysCtlByName("hw.optional.sse"); - features->sse2 = GetDarwinSysCtlByName("hw.optional.sse2"); - features->sse3 = GetDarwinSysCtlByName("hw.optional.sse3"); - features->ssse3 = GetDarwinSysCtlByName("hw.optional.supplementalsse3"); - features->sse4_1 = GetDarwinSysCtlByName("hw.optional.sse4_1"); - features->sse4_2 = GetDarwinSysCtlByName("hw.optional.sse4_2"); -#elif defined(CPU_FEATURES_OS_FREEBSD) - // Handling FreeBSD platform through parsing /var/run/dmesg.boot. - const int fd = CpuFeatures_OpenFile("/var/run/dmesg.boot"); - if (fd >= 0) { - StackLineReader reader; - StackLineReader_Initialize(&reader, fd); - for (bool stop = false; !stop;) { - const LineResult result = StackLineReader_NextLine(&reader); - if (result.eof) stop = true; - const StringView line = result.line; - if (!CpuFeatures_StringView_StartsWith(line, str(" Features"))) - continue; - // Lines of interests are of the following form: - // " Features=0x1783fbff" - // We first extract the comma separated values between angle brackets. - StringView csv = result.line; - int index = CpuFeatures_StringView_IndexOfChar(csv, '<'); - if (index >= 0) csv = CpuFeatures_StringView_PopFront(csv, index + 1); - if (csv.size > 0 && CpuFeatures_StringView_Back(csv) == '>') - csv = CpuFeatures_StringView_PopBack(csv, 1); - if (CpuFeatures_StringView_HasWord(csv, "SSE", ',')) - features->sse = true; - if (CpuFeatures_StringView_HasWord(csv, "SSE2", ',')) - features->sse2 = true; - if (CpuFeatures_StringView_HasWord(csv, "SSE3", ',')) - features->sse3 = true; - if (CpuFeatures_StringView_HasWord(csv, "SSSE3", ',')) - features->ssse3 = true; - if (CpuFeatures_StringView_HasWord(csv, "SSE4.1", ',')) - features->sse4_1 = true; - if (CpuFeatures_StringView_HasWord(csv, "SSE4.2", ',')) - features->sse4_2 = true; - } - CpuFeatures_CloseFile(fd); - } -#elif defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) - // Handling Linux platform through /proc/cpuinfo. - const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); - if (fd >= 0) { - StackLineReader reader; - StackLineReader_Initialize(&reader, fd); - for (bool stop = false; !stop;) { - const LineResult result = StackLineReader_NextLine(&reader); - if (result.eof) stop = true; - const StringView line = result.line; - StringView key, value; - if (!CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) - continue; - if (!CpuFeatures_StringView_IsEquals(key, str("flags"))) continue; - features->sse = CpuFeatures_StringView_HasWord(value, "sse", ' '); - features->sse2 = CpuFeatures_StringView_HasWord(value, "sse2", ' '); - features->sse3 = CpuFeatures_StringView_HasWord(value, "sse3", ' '); - features->ssse3 = CpuFeatures_StringView_HasWord(value, "ssse3", ' '); - features->sse4_1 = CpuFeatures_StringView_HasWord(value, "sse4_1", ' '); - features->sse4_2 = CpuFeatures_StringView_HasWord(value, "sse4_2", ' '); - break; - } - CpuFeatures_CloseFile(fd); - } -#else -#error "Unsupported fallback detection of SSE OS support." -#endif - // Now that we have queried the OS for SSE support, we report this back to - // os_preserves. This is needed in case of AMD CPU's to enable testing of - // sse4a (See ParseExtraAMDCpuId below). - if (features->sse) os_preserves->sse_registers = true; - } -} - -// Reference -// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. -static Leaf GetLeafByIdAMD(uint32_t leaf_id) { - uint32_t max_extended = CpuId(0x80000000).eax; - return SafeCpuId(max_extended, leaf_id); -} - -static void ParseExtraAMDCpuId(X86Info* info, OsPreserves os_preserves) { - const Leaf leaf_80000001 = GetLeafByIdAMD(0x80000001); - - X86Features* const features = &info->features; - - if (os_preserves.sse_registers) { - features->sse4a = IsBitSet(leaf_80000001.ecx, 6); - } - - if (os_preserves.avx_registers) { - features->fma4 = IsBitSet(leaf_80000001.ecx, 16); - } -} - -static const X86Info kEmptyX86Info; -static const OsPreserves kEmptyOsPreserves; - -X86Info GetX86Info(void) { - X86Info info = kEmptyX86Info; - const Leaf leaf_0 = CpuId(0); - const bool is_intel = IsVendor(leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL); - const bool is_amd = IsVendor(leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD); - const bool is_hygon = IsVendor(leaf_0, CPU_FEATURES_VENDOR_HYGON_GENUINE); - SetVendor(leaf_0, info.vendor); - if (is_intel || is_amd || is_hygon) { - OsPreserves os_preserves = kEmptyOsPreserves; - const uint32_t max_cpuid_leaf = leaf_0.eax; - ParseCpuId(max_cpuid_leaf, &info, &os_preserves); - if (is_amd || is_hygon) { - ParseExtraAMDCpuId(&info, os_preserves); - } + int line_size = ExtractBitRange(leaf.ebx, 11, 0) + 1; + int partitioning = ExtractBitRange(leaf.ebx, 21, 12) + 1; + int ways = ExtractBitRange(leaf.ebx, 31, 22) + 1; + int tlb_entries = leaf.ecx + 1; + int cache_size = ways * partitioning * line_size * tlb_entries; + info.levels[info.size] = (CacheLevelInfo){.level = level, + .cache_type = cache_type, + .cache_size = cache_size, + .ways = ways, + .line_size = line_size, + .tlb_entries = tlb_entries, + .partitioning = partitioning}; + ++info.size; } - return info; + // Override CacheInfo if we successfully extracted Deterministic Cache + // Parameters. + if (info.size > 0) *old_info = info; } CacheInfo GetX86CacheInfo(void) { @@ -1487,326 +1552,116 @@ CacheInfo GetX86CacheInfo(void) { return info; } -#define CPUID(FAMILY, MODEL) ((((FAMILY)&0xFF) << 8) | ((MODEL)&0xFF)) - -X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { - if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_GENUINE_INTEL)) { - switch (CPUID(info->family, info->model)) { - case CPUID(0x06, 0x1C): // Intel(R) Atom(TM) CPU 230 @ 1.60GHz - case CPUID(0x06, 0x35): - case CPUID(0x06, 0x36): - case CPUID(0x06, 0x70): // https://en.wikichip.org/wiki/intel/atom/230 - // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) - return INTEL_ATOM_BNL; - case CPUID(0x06, 0x37): - case CPUID(0x06, 0x4C): - // https://en.wikipedia.org/wiki/Silvermont - return INTEL_ATOM_SMT; - case CPUID(0x06, 0x5C): - // https://en.wikipedia.org/wiki/Goldmont - return INTEL_ATOM_GMT; - case CPUID(0x06, 0x0F): - case CPUID(0x06, 0x16): - // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) - return INTEL_CORE; - case CPUID(0x06, 0x17): - case CPUID(0x06, 0x1D): - // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) - return INTEL_PNR; - case CPUID(0x06, 0x1A): - case CPUID(0x06, 0x1E): - case CPUID(0x06, 0x1F): - case CPUID(0x06, 0x2E): - // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) - return INTEL_NHM; - case CPUID(0x06, 0x25): - case CPUID(0x06, 0x2C): - case CPUID(0x06, 0x2F): - // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) - return INTEL_WSM; - case CPUID(0x06, 0x2A): - case CPUID(0x06, 0x2D): - // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings - return INTEL_SNB; - case CPUID(0x06, 0x3A): - case CPUID(0x06, 0x3E): - // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings - return INTEL_IVB; - case CPUID(0x06, 0x3C): - case CPUID(0x06, 0x3F): - case CPUID(0x06, 0x45): - case CPUID(0x06, 0x46): - // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) - return INTEL_HSW; - case CPUID(0x06, 0x3D): - case CPUID(0x06, 0x47): - case CPUID(0x06, 0x4F): - case CPUID(0x06, 0x56): - // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) - return INTEL_BDW; - case CPUID(0x06, 0x4E): - case CPUID(0x06, 0x55): - case CPUID(0x06, 0x5E): - // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) - return INTEL_SKL; - case CPUID(0x06, 0x66): - // https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture) - return INTEL_CNL; - case CPUID(0x06, 0x7D): // client - case CPUID(0x06, 0x7E): // client - case CPUID(0x06, 0x9D): // NNP-I - case CPUID(0x06, 0x6A): // server - case CPUID(0x06, 0x6C): // server - // https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor) - return INTEL_ICL; - case CPUID(0x06, 0x8C): - case CPUID(0x06, 0x8D): - // https://en.wikipedia.org/wiki/Tiger_Lake_(microarchitecture) - return INTEL_TGL; - case CPUID(0x06, 0x8F): - // https://en.wikipedia.org/wiki/Sapphire_Rapids - return INTEL_SPR; - case CPUID(0x06, 0x8E): - switch (info->stepping) { - case 9: - return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake - case 10: - return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake - case 11: - return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture) - default: - return X86_UNKNOWN; - } - case CPUID(0x06, 0x9E): - if (info->stepping > 9) { - // https://en.wikipedia.org/wiki/Coffee_Lake - return INTEL_CFL; - } else { - // https://en.wikipedia.org/wiki/Kaby_Lake - return INTEL_KBL; - } - default: - return X86_UNKNOWN; - } - } - if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_AUTHENTIC_AMD)) { - switch (CPUID(info->family, info->model)) { - // https://en.wikichip.org/wiki/amd/cpuid - case CPUID(0xF, 0x04): - case CPUID(0xF, 0x05): - case CPUID(0xF, 0x07): - case CPUID(0xF, 0x08): - case CPUID(0xF, 0x0C): - case CPUID(0xF, 0x0E): - case CPUID(0xF, 0x0F): - case CPUID(0xF, 0x14): - case CPUID(0xF, 0x15): - case CPUID(0xF, 0x17): - case CPUID(0xF, 0x18): - case CPUID(0xF, 0x1B): - case CPUID(0xF, 0x1C): - case CPUID(0xF, 0x1F): - case CPUID(0xF, 0x21): - case CPUID(0xF, 0x23): - case CPUID(0xF, 0x24): - case CPUID(0xF, 0x25): - case CPUID(0xF, 0x27): - case CPUID(0xF, 0x2B): - case CPUID(0xF, 0x2C): - case CPUID(0xF, 0x2F): - case CPUID(0xF, 0x41): - case CPUID(0xF, 0x43): - case CPUID(0xF, 0x48): - case CPUID(0xF, 0x4B): - case CPUID(0xF, 0x4C): - case CPUID(0xF, 0x4F): - case CPUID(0xF, 0x5D): - case CPUID(0xF, 0x5F): - case CPUID(0xF, 0x68): - case CPUID(0xF, 0x6B): - case CPUID(0xF, 0x6F): - case CPUID(0xF, 0x7F): - case CPUID(0xF, 0xC1): - return AMD_HAMMER; - case CPUID(0x10, 0x02): - case CPUID(0x10, 0x04): - case CPUID(0x10, 0x05): - case CPUID(0x10, 0x06): - case CPUID(0x10, 0x08): - case CPUID(0x10, 0x09): - case CPUID(0x10, 0x0A): - return AMD_K10; - case CPUID(0x11, 0x03): - // http://developer.amd.com/wordpress/media/2012/10/41788.pdf - return AMD_K11; - case CPUID(0x12, 0x01): - // https://www.amd.com/system/files/TechDocs/44739_12h_Rev_Gd.pdf - return AMD_K12; - case CPUID(0x14, 0x00): - case CPUID(0x14, 0x01): - case CPUID(0x14, 0x02): - // https://www.amd.com/system/files/TechDocs/47534_14h_Mod_00h-0Fh_Rev_Guide.pdf - return AMD_BOBCAT; - case CPUID(0x15, 0x01): - // https://en.wikichip.org/wiki/amd/microarchitectures/bulldozer - return AMD_BULLDOZER; - case CPUID(0x15, 0x02): - case CPUID(0x15, 0x11): - case CPUID(0x15, 0x13): - // https://en.wikichip.org/wiki/amd/microarchitectures/piledriver - return AMD_PILEDRIVER; - case CPUID(0x15, 0x30): - case CPUID(0x15, 0x38): - // https://en.wikichip.org/wiki/amd/microarchitectures/steamroller - return AMD_STREAMROLLER; - case CPUID(0x15, 0x60): - case CPUID(0x15, 0x65): - case CPUID(0x15, 0x70): - // https://en.wikichip.org/wiki/amd/microarchitectures/excavator - return AMD_EXCAVATOR; - case CPUID(0x16, 0x00): - return AMD_JAGUAR; - case CPUID(0x16, 0x30): - return AMD_PUMA; - case CPUID(0x17, 0x01): - case CPUID(0x17, 0x11): - case CPUID(0x17, 0x18): - case CPUID(0x17, 0x20): - // https://en.wikichip.org/wiki/amd/microarchitectures/zen - return AMD_ZEN; - case CPUID(0x17, 0x08): - // https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B - return AMD_ZEN_PLUS; - case CPUID(0x17, 0x31): - case CPUID(0x17, 0x47): - case CPUID(0x17, 0x60): - case CPUID(0x17, 0x68): - case CPUID(0x17, 0x71): - case CPUID(0x17, 0x90): - case CPUID(0x17, 0x98): - // https://en.wikichip.org/wiki/amd/microarchitectures/zen_2 - return AMD_ZEN2; - case CPUID(0x19, 0x01): - case CPUID(0x19, 0x21): - case CPUID(0x19, 0x30): - case CPUID(0x19, 0x40): - case CPUID(0x19, 0x50): - // https://en.wikichip.org/wiki/amd/microarchitectures/zen_3 - return AMD_ZEN3; - default: - return X86_UNKNOWN; - } - } - if (IsVendorByX86Info(info, CPU_FEATURES_VENDOR_HYGON_GENUINE)) { - switch (CPUID(info->family, info->model)) { - case CPUID(0x18, 0x00): - return AMD_ZEN; - } - } - return X86_UNKNOWN; -} - -void FillX86BrandString(char brand_string[49]) { - const Leaf leaf_ext_0 = CpuId(0x80000000); - const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; - const Leaf leaves[3] = { - SafeCpuId(max_cpuid_leaf_ext, 0x80000002), - SafeCpuId(max_cpuid_leaf_ext, 0x80000003), - SafeCpuId(max_cpuid_leaf_ext, 0x80000004), - }; -#if __STDC_VERSION__ >= 201112L - _Static_assert(sizeof(leaves) == 48, "Leaves must be packed"); -#endif - CpuFeatures_StringView_CopyString(view((const char*)leaves, sizeof(leaves)), - brand_string, 49); -} - //////////////////////////////////////////////////////////////////////////////// -// Introspection functions - -int GetX86FeaturesEnumValue(const X86Features* features, - X86FeaturesEnum value) { - if (value >= X86_LAST_) return false; - return kGetters[value](features); -} - -const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { - if (value >= X86_LAST_) return "unknown_feature"; - return kCpuInfoFlags[value]; -} - -const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) { - switch (uarch) { - case X86_UNKNOWN: - return "X86_UNKNOWN"; - case INTEL_CORE: - return "INTEL_CORE"; - case INTEL_PNR: - return "INTEL_PNR"; - case INTEL_NHM: - return "INTEL_NHM"; - case INTEL_ATOM_BNL: - return "INTEL_ATOM_BNL"; - case INTEL_WSM: - return "INTEL_WSM"; - case INTEL_SNB: - return "INTEL_SNB"; - case INTEL_IVB: - return "INTEL_IVB"; - case INTEL_ATOM_SMT: - return "INTEL_ATOM_SMT"; - case INTEL_HSW: - return "INTEL_HSW"; - case INTEL_BDW: - return "INTEL_BDW"; - case INTEL_SKL: - return "INTEL_SKL"; - case INTEL_ATOM_GMT: - return "INTEL_ATOM_GMT"; - case INTEL_KBL: - return "INTEL_KBL"; - case INTEL_CFL: - return "INTEL_CFL"; - case INTEL_WHL: - return "INTEL_WHL"; - case INTEL_CNL: - return "INTEL_CNL"; - case INTEL_ICL: - return "INTEL_ICL"; - case INTEL_TGL: - return "INTEL_TGL"; - case INTEL_SPR: - return "INTEL_SPR"; - case AMD_HAMMER: - return "AMD_HAMMER"; - case AMD_K10: - return "AMD_K10"; - case AMD_K11: - return "AMD_K11"; - case AMD_K12: - return "AMD_K12"; - case AMD_BOBCAT: - return "AMD_BOBCAT"; - case AMD_PILEDRIVER: - return "AMD_PILEDRIVER"; - case AMD_STREAMROLLER: - return "AMD_STREAMROLLER"; - case AMD_EXCAVATOR: - return "AMD_EXCAVATOR"; - case AMD_BULLDOZER: - return "AMD_BULLDOZER"; - case AMD_PUMA: - return "AMD_PUMA"; - case AMD_JAGUAR: - return "AMD_JAGUAR"; - case AMD_ZEN: - return "AMD_ZEN"; - case AMD_ZEN_PLUS: - return "AMD_ZEN_PLUS"; - case AMD_ZEN2: - return "AMD_ZEN2"; - case AMD_ZEN3: - return "AMD_ZEN3"; - } - return "unknown microarchitecture"; +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(X86_FPU, fpu, , , ) \ + LINE(X86_TSC, tsc, , , ) \ + LINE(X86_CX8, cx8, , , ) \ + LINE(X86_CLFSH, clfsh, , , ) \ + LINE(X86_MMX, mmx, , , ) \ + LINE(X86_AES, aes, , , ) \ + LINE(X86_ERMS, erms, , , ) \ + LINE(X86_F16C, f16c, , , ) \ + LINE(X86_FMA4, fma4, , , ) \ + LINE(X86_FMA3, fma3, , , ) \ + LINE(X86_VAES, vaes, , , ) \ + LINE(X86_VPCLMULQDQ, vpclmulqdq, , , ) \ + LINE(X86_BMI1, bmi1, , , ) \ + LINE(X86_HLE, hle, , , ) \ + LINE(X86_BMI2, bmi2, , , ) \ + LINE(X86_RTM, rtm, , , ) \ + LINE(X86_RDSEED, rdseed, , , ) \ + LINE(X86_CLFLUSHOPT, clflushopt, , , ) \ + LINE(X86_CLWB, clwb, , , ) \ + LINE(X86_SSE, sse, , , ) \ + LINE(X86_SSE2, sse2, , , ) \ + LINE(X86_SSE3, sse3, , , ) \ + LINE(X86_SSSE3, ssse3, , , ) \ + LINE(X86_SSE4_1, sse4_1, , , ) \ + LINE(X86_SSE4_2, sse4_2, , , ) \ + LINE(X86_SSE4A, sse4a, , , ) \ + LINE(X86_AVX, avx, , , ) \ + LINE(X86_AVX2, avx2, , , ) \ + LINE(X86_AVX512F, avx512f, , , ) \ + LINE(X86_AVX512CD, avx512cd, , , ) \ + LINE(X86_AVX512ER, avx512er, , , ) \ + LINE(X86_AVX512PF, avx512pf, , , ) \ + LINE(X86_AVX512BW, avx512bw, , , ) \ + LINE(X86_AVX512DQ, avx512dq, , , ) \ + LINE(X86_AVX512VL, avx512vl, , , ) \ + LINE(X86_AVX512IFMA, avx512ifma, , , ) \ + LINE(X86_AVX512VBMI, avx512vbmi, , , ) \ + LINE(X86_AVX512VBMI2, avx512vbmi2, , , ) \ + LINE(X86_AVX512VNNI, avx512vnni, , , ) \ + LINE(X86_AVX512BITALG, avx512bitalg, , , ) \ + LINE(X86_AVX512VPOPCNTDQ, avx512vpopcntdq, , , ) \ + LINE(X86_AVX512_4VNNIW, avx512_4vnniw, , , ) \ + LINE(X86_AVX512_4VBMI2, avx512_4vbmi2, , , ) \ + LINE(X86_AVX512_SECOND_FMA, avx512_second_fma, , , ) \ + LINE(X86_AVX512_4FMAPS, avx512_4fmaps, , , ) \ + LINE(X86_AVX512_BF16, avx512_bf16, , , ) \ + LINE(X86_AVX512_VP2INTERSECT, avx512_vp2intersect, , , ) \ + LINE(X86_AMX_BF16, amx_bf16, , , ) \ + LINE(X86_AMX_TILE, amx_tile, , , ) \ + LINE(X86_AMX_INT8, amx_int8, , , ) \ + LINE(X86_PCLMULQDQ, pclmulqdq, , , ) \ + LINE(X86_SMX, smx, , , ) \ + LINE(X86_SGX, sgx, , , ) \ + LINE(X86_CX16, cx16, , , ) \ + LINE(X86_SHA, sha, , , ) \ + LINE(X86_POPCNT, popcnt, , , ) \ + LINE(X86_MOVBE, movbe, , , ) \ + LINE(X86_RDRND, rdrnd, , , ) \ + LINE(X86_DCA, dca, , , ) \ + LINE(X86_SS, ss, , , ) \ + LINE(X86_ADX, adx, , , ) +#define INTROSPECTION_PREFIX X86 +#define INTROSPECTION_ENUM_PREFIX X86 +#include "define_introspection.inl" + +#define X86_MICROARCHITECTURE_NAMES \ + LINE(X86_UNKNOWN) \ + LINE(INTEL_CORE) \ + LINE(INTEL_PNR) \ + LINE(INTEL_NHM) \ + LINE(INTEL_ATOM_BNL) \ + LINE(INTEL_WSM) \ + LINE(INTEL_SNB) \ + LINE(INTEL_IVB) \ + LINE(INTEL_ATOM_SMT) \ + LINE(INTEL_HSW) \ + LINE(INTEL_BDW) \ + LINE(INTEL_SKL) \ + LINE(INTEL_ATOM_GMT) \ + LINE(INTEL_KBL) \ + LINE(INTEL_CFL) \ + LINE(INTEL_WHL) \ + LINE(INTEL_CNL) \ + LINE(INTEL_ICL) \ + LINE(INTEL_TGL) \ + LINE(INTEL_SPR) \ + LINE(AMD_HAMMER) \ + LINE(AMD_K10) \ + LINE(AMD_K11) \ + LINE(AMD_K12) \ + LINE(AMD_BOBCAT) \ + LINE(AMD_PILEDRIVER) \ + LINE(AMD_STREAMROLLER) \ + LINE(AMD_EXCAVATOR) \ + LINE(AMD_BULLDOZER) \ + LINE(AMD_JAGUAR) \ + LINE(AMD_PUMA) \ + LINE(AMD_ZEN) \ + LINE(AMD_ZEN_PLUS) \ + LINE(AMD_ZEN2) \ + LINE(AMD_ZEN3) + +const char* GetX86MicroarchitectureName(X86Microarchitecture value) { +#define LINE(ENUM) [ENUM] = STRINGIZE(ENUM), + static const char* kMicroarchitectureNames[] = {X86_MICROARCHITECTURE_NAMES}; +#undef LINE + if (value >= X86_MICROARCHITECTURE_LAST_) return "unknown microarchitecture"; + return kMicroarchitectureNames[value]; } diff --git a/src/impl_x86_freebsd.c b/src/impl_x86_freebsd.c new file mode 100644 index 00000000..7f481f95 --- /dev/null +++ b/src/impl_x86_freebsd.c @@ -0,0 +1,66 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_FREEBSD + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + // No override +} + +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling FreeBSD platform through parsing /var/run/dmesg.boot. + const int fd = CpuFeatures_OpenFile("/var/run/dmesg.boot"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (bool stop = false; !stop;) { + const LineResult result = StackLineReader_NextLine(&reader); + if (result.eof) stop = true; + const StringView line = result.line; + if (!CpuFeatures_StringView_StartsWith(line, str(" Features"))) continue; + // Lines of interests are of the following form: + // " Features=0x1783fbff" + // We first extract the comma separated values between angle brackets. + StringView csv = result.line; + int index = CpuFeatures_StringView_IndexOfChar(csv, '<'); + if (index >= 0) csv = CpuFeatures_StringView_PopFront(csv, index + 1); + if (csv.size > 0 && CpuFeatures_StringView_Back(csv) == '>') + csv = CpuFeatures_StringView_PopBack(csv, 1); + if (CpuFeatures_StringView_HasWord(csv, "SSE", ',')) features->sse = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE2", ',')) + features->sse2 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE3", ',')) + features->sse3 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSSE3", ',')) + features->ssse3 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE4.1", ',')) + features->sse4_1 = true; + if (CpuFeatures_StringView_HasWord(csv, "SSE4.2", ',')) + features->sse4_2 = true; + } + CpuFeatures_CloseFile(fd); + } +} + +#endif // CPU_FEATURES_OS_FREEBSD +#endif // CPU_FEATURES_ARCH_X86 diff --git a/src/impl_x86_linux_or_android.c b/src/impl_x86_linux_or_android.c new file mode 100644 index 00000000..cad46b34 --- /dev/null +++ b/src/impl_x86_linux_or_android.c @@ -0,0 +1,56 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#if defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + // No override +} + +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Linux platform through /proc/cpuinfo. + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (bool stop = false; !stop;) { + const LineResult result = StackLineReader_NextLine(&reader); + if (result.eof) stop = true; + const StringView line = result.line; + StringView key, value; + if (!CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) + continue; + if (!CpuFeatures_StringView_IsEquals(key, str("flags"))) continue; + features->sse = CpuFeatures_StringView_HasWord(value, "sse", ' '); + features->sse2 = CpuFeatures_StringView_HasWord(value, "sse2", ' '); + features->sse3 = CpuFeatures_StringView_HasWord(value, "sse3", ' '); + features->ssse3 = CpuFeatures_StringView_HasWord(value, "ssse3", ' '); + features->sse4_1 = CpuFeatures_StringView_HasWord(value, "sse4_1", ' '); + features->sse4_2 = CpuFeatures_StringView_HasWord(value, "sse4_2", ' '); + break; + } + CpuFeatures_CloseFile(fd); + } +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_X86 diff --git a/src/impl_x86_macos.c b/src/impl_x86_macos.c new file mode 100644 index 00000000..ca92d829 --- /dev/null +++ b/src/impl_x86_macos.c @@ -0,0 +1,52 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_MACOS + +#include "impl_x86__base_implementation.inl" + +#if !defined(HAVE_SYSCTLBYNAME) +#error "Darwin needs support for sysctlbyname" +#endif +#include + +static bool GetDarwinSysCtlByName(const char* name) { + int enabled; + size_t enabled_len = sizeof(enabled); + const int failure = sysctlbyname(name, &enabled, &enabled_len, NULL, 0); + return failure ? false : enabled; +} + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + // On Darwin AVX512 support is On-demand. + // We have to query the OS instead of querying the Zmm save/restore state. + // https://github.com/apple/darwin-xnu/blob/8f02f2a044b9bb1ad951987ef5bab20ec9486310/osfmk/i386/fpu.c#L173-L199 + os_preserves->avx512_registers = GetDarwinSysCtlByName("hw.optional.avx512f"); +} + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Darwin platform through sysctlbyname. + features->sse = GetDarwinSysCtlByName("hw.optional.sse"); + features->sse2 = GetDarwinSysCtlByName("hw.optional.sse2"); + features->sse3 = GetDarwinSysCtlByName("hw.optional.sse3"); + features->ssse3 = GetDarwinSysCtlByName("hw.optional.supplementalsse3"); + features->sse4_1 = GetDarwinSysCtlByName("hw.optional.sse4_1"); + features->sse4_2 = GetDarwinSysCtlByName("hw.optional.sse4_2"); +} + +#endif // CPU_FEATURES_OS_MACOS +#endif // CPU_FEATURES_ARCH_X86 diff --git a/src/impl_x86_windows.c b/src/impl_x86_windows.c new file mode 100644 index 00000000..050fd4a7 --- /dev/null +++ b/src/impl_x86_windows.c @@ -0,0 +1,48 @@ +// Copyright 2017 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_X86 +#ifdef CPU_FEATURES_OS_WINDOWS + +#include "impl_x86__base_implementation.inl" + +static void OverrideOsPreserves(OsPreserves* os_preserves) { + // No override +} + +#include // IsProcessorFeaturePresent + +#if defined(CPU_FEATURES_MOCK_CPUID_X86) +extern bool GetWindowsIsProcessorFeaturePresent(DWORD); +#else // CPU_FEATURES_MOCK_CPUID_X86 +static bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { + return IsProcessorFeaturePresent(ProcessorFeature); +} +#endif + +static void DetectFeaturesFromOs(X86Features* features) { + // Handling Windows platform through IsProcessorFeaturePresent. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + features->sse = + GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); + features->sse2 = + GetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + features->sse3 = + GetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); +} + +#endif // CPU_FEATURES_OS_WINDOWS +#endif // CPU_FEATURES_ARCH_X86 diff --git a/src/string_view.c b/src/string_view.c index a20585c9..d2317db0 100644 --- a/src/string_view.c +++ b/src/string_view.c @@ -16,11 +16,19 @@ #include #include -#include + +#include "copy.h" +#include "equals.h" + +static const char* CpuFeatures_memchr(const char* ptr, size_t size, char c) { + for (; ptr && *ptr != '\0'; ++ptr) + if (*ptr == c) return ptr; + return NULL; +} int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) { if (view.ptr && view.size) { - const char* const found = (const char*)memchr(view.ptr, c, view.size); + const char* const found = CpuFeatures_memchr(view.ptr, view.size, c); if (found) { return (int)(found - view.ptr); } @@ -48,14 +56,14 @@ int CpuFeatures_StringView_IndexOf(const StringView view, bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) { if (a.size == b.size) { - return a.ptr == b.ptr || memcmp(a.ptr, b.ptr, b.size) == 0; + return a.ptr == b.ptr || equals(a.ptr, b.ptr, b.size); } return false; } bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) { return a.ptr && b.ptr && b.size && a.size >= b.size - ? memcmp(a.ptr, b.ptr, b.size) == 0 + ? equals(a.ptr, b.ptr, b.size) : false; } @@ -138,7 +146,7 @@ void CpuFeatures_StringView_CopyString(const StringView src, char* dst, const size_t max_copy_size = dst_size - 1; const size_t copy_size = src.size > max_copy_size ? max_copy_size : src.size; - memcpy(dst, src.ptr, copy_size); + copy(dst, src.ptr, copy_size); dst[copy_size] = '\0'; } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c10e617a..3b45f774 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,7 +47,13 @@ add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test) ##------------------------------------------------------------------------------ ## cpuinfo_x86_test if(PROCESSOR_IS_X86) - add_executable(cpuinfo_x86_test cpuinfo_x86_test.cc ../src/cpuinfo_x86.c) + add_executable(cpuinfo_x86_test + cpuinfo_x86_test.cc + ../src/impl_x86_freebsd.c + ../src/impl_x86_linux_or_android.c + ../src/impl_x86_macos.c + ../src/impl_x86_windows.c + ) target_compile_definitions(cpuinfo_x86_test PUBLIC CPU_FEATURES_MOCK_CPUID_X86) if(APPLE) target_compile_definitions(cpuinfo_x86_test PRIVATE HAVE_SYSCTLBYNAME) @@ -58,28 +64,28 @@ endif() ##------------------------------------------------------------------------------ ## cpuinfo_arm_test if(PROCESSOR_IS_ARM) - add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/cpuinfo_arm.c) + add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/impl_arm_linux_or_android.c) target_link_libraries(cpuinfo_arm_test all_libraries) add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test) endif() ##------------------------------------------------------------------------------ ## cpuinfo_aarch64_test if(PROCESSOR_IS_AARCH64) - add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c) + add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/impl_aarch64_linux_or_android.c) target_link_libraries(cpuinfo_aarch64_test all_libraries) add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test) endif() ##------------------------------------------------------------------------------ ## cpuinfo_mips_test if(PROCESSOR_IS_MIPS) - add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ../src/cpuinfo_mips.c) + add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ..src/impl_mips_linux_or_android.c) target_link_libraries(cpuinfo_mips_test all_libraries) add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test) endif() ##------------------------------------------------------------------------------ ## cpuinfo_ppc_test if(PROCESSOR_IS_POWER) - add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/cpuinfo_ppc.c) + add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/impl_ppc_linux.c) target_link_libraries(cpuinfo_ppc_test all_libraries) add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test) endif()