Improve AVX512 feature detection

This commit is contained in:
Justine Tunney 2024-05-05 05:45:52 -07:00
parent 317c8bc312
commit 5fd7b07fac
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
7 changed files with 492 additions and 144 deletions

View file

@ -12,135 +12,9 @@
//
//===----------------------------------------------------------------------===//
#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
#include "libc/intrin/x86.h"
enum VendorSignatures {
SIG_INTEL = 0x756e6547, // Genu
SIG_AMD = 0x68747541, // Auth
};
enum ProcessorVendors {
VENDOR_INTEL = 1,
VENDOR_AMD,
VENDOR_OTHER,
VENDOR_MAX
};
enum ProcessorTypes {
INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
INTEL_SILVERMONT,
INTEL_KNL,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
INTEL_KNM,
INTEL_GOLDMONT,
INTEL_GOLDMONT_PLUS,
INTEL_TREMONT,
AMDFAM19H,
ZHAOXIN_FAM7H,
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
CPU_TYPE_MAX
};
enum ProcessorSubtypes {
INTEL_COREI7_NEHALEM = 1,
INTEL_COREI7_WESTMERE,
INTEL_COREI7_SANDYBRIDGE,
AMDFAM10H_BARCELONA,
AMDFAM10H_SHANGHAI,
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
INTEL_COREI7_CANNONLAKE,
INTEL_COREI7_ICELAKE_CLIENT,
INTEL_COREI7_ICELAKE_SERVER,
AMDFAM17H_ZNVER2,
INTEL_COREI7_CASCADELAKE,
INTEL_COREI7_TIGERLAKE,
INTEL_COREI7_COOPERLAKE,
INTEL_COREI7_SAPPHIRERAPIDS,
INTEL_COREI7_ALDERLAKE,
AMDFAM19H_ZNVER3,
INTEL_COREI7_ROCKETLAKE,
ZHAOXIN_FAM7H_LUJIAZUI,
AMDFAM19H_ZNVER4,
INTEL_COREI7_GRANITERAPIDS,
INTEL_COREI7_GRANITERAPIDS_D,
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
CPU_SUBTYPE_MAX
};
enum ProcessorFeatures {
FEATURE_CMOV = 0,
FEATURE_MMX,
FEATURE_POPCNT,
FEATURE_SSE,
FEATURE_SSE2,
FEATURE_SSE3,
FEATURE_SSSE3,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
FEATURE_FMA,
FEATURE_AVX512F,
FEATURE_BMI,
FEATURE_BMI2,
FEATURE_AES,
FEATURE_PCLMUL,
FEATURE_AVX512VL,
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
FEATURE_AVX512ER,
FEATURE_AVX512PF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
FEATURE_AVX5124FMAPS,
FEATURE_AVX512VPOPCNTDQ,
FEATURE_AVX512VBMI2,
FEATURE_GFNI,
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
FEATURE_AVX512BITALG,
FEATURE_AVX512BF16,
FEATURE_AVX512VP2INTERSECT,
FEATURE_CMPXCHG16B = 46,
FEATURE_F16C = 49,
FEATURE_LAHF_LM = 54,
FEATURE_LM,
FEATURE_WP,
FEATURE_LZCNT,
FEATURE_MOVBE,
FEATURE_AVX512FP16 = 94,
FEATURE_X86_64_BASELINE,
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
CPU_FEATURE_MAX
};
struct __processor_model __cpu_model;
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
@ -782,13 +656,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
#undef setFeature
}
struct __processor_model {
unsigned int __cpu_vendor;
unsigned int __cpu_type;
unsigned int __cpu_subtype;
unsigned int __cpu_features[1];
} __cpu_model = {0, 0, 0, {0}};
unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
// A constructor function that is sets __cpu_model and __cpu_features2 with

147
libc/intrin/x86.h Normal file
View file

@ -0,0 +1,147 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_X86_H_
#define COSMOPOLITAN_LIBC_INTRIN_X86_H_
COSMOPOLITAN_C_START_
enum VendorSignatures {
SIG_INTEL = 0x756e6547, // Genu
SIG_AMD = 0x68747541, // Auth
};
enum ProcessorVendors {
VENDOR_INTEL = 1,
VENDOR_AMD,
VENDOR_OTHER,
VENDOR_MAX
};
enum ProcessorTypes {
INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
INTEL_SILVERMONT,
INTEL_KNL,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
INTEL_KNM,
INTEL_GOLDMONT,
INTEL_GOLDMONT_PLUS,
INTEL_TREMONT,
AMDFAM19H,
ZHAOXIN_FAM7H,
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
CPU_TYPE_MAX
};
enum ProcessorSubtypes {
INTEL_COREI7_NEHALEM = 1,
INTEL_COREI7_WESTMERE,
INTEL_COREI7_SANDYBRIDGE,
AMDFAM10H_BARCELONA,
AMDFAM10H_SHANGHAI,
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
INTEL_COREI7_CANNONLAKE,
INTEL_COREI7_ICELAKE_CLIENT,
INTEL_COREI7_ICELAKE_SERVER,
AMDFAM17H_ZNVER2,
INTEL_COREI7_CASCADELAKE,
INTEL_COREI7_TIGERLAKE,
INTEL_COREI7_COOPERLAKE,
INTEL_COREI7_SAPPHIRERAPIDS,
INTEL_COREI7_ALDERLAKE,
AMDFAM19H_ZNVER3,
INTEL_COREI7_ROCKETLAKE,
ZHAOXIN_FAM7H_LUJIAZUI,
AMDFAM19H_ZNVER4,
INTEL_COREI7_GRANITERAPIDS,
INTEL_COREI7_GRANITERAPIDS_D,
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
CPU_SUBTYPE_MAX
};
enum ProcessorFeatures {
FEATURE_CMOV = 0,
FEATURE_MMX,
FEATURE_POPCNT,
FEATURE_SSE,
FEATURE_SSE2,
FEATURE_SSE3,
FEATURE_SSSE3,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
FEATURE_FMA,
FEATURE_AVX512F,
FEATURE_BMI,
FEATURE_BMI2,
FEATURE_AES,
FEATURE_PCLMUL,
FEATURE_AVX512VL,
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
FEATURE_AVX512ER,
FEATURE_AVX512PF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
FEATURE_AVX5124FMAPS,
FEATURE_AVX512VPOPCNTDQ,
FEATURE_AVX512VBMI2,
FEATURE_GFNI,
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
FEATURE_AVX512BITALG,
FEATURE_AVX512BF16,
FEATURE_AVX512VP2INTERSECT,
FEATURE_CMPXCHG16B = 46,
FEATURE_F16C = 49,
FEATURE_LAHF_LM = 54,
FEATURE_LM,
FEATURE_WP,
FEATURE_LZCNT,
FEATURE_MOVBE,
FEATURE_AVX512FP16 = 94,
FEATURE_X86_64_BASELINE,
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
CPU_FEATURE_MAX
};
struct __processor_model {
unsigned __cpu_vendor;
unsigned __cpu_type;
unsigned __cpu_subtype;
unsigned __cpu_features[1];
const char *__cpu_march;
};
struct __processor_model __cpu_model;
const char *__cpu_march(unsigned);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_X86_H_ */

94
libc/intrin/x86march.c Normal file
View file

@ -0,0 +1,94 @@
// Copyright 2024 Justine Alexandra Roberts Tunney
//
// Permission to use, copy, modify, and/or distribute this software for
// any purpose with or without fee is hereby granted, provided that the
// above copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
// DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
// PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
#include "libc/intrin/x86.h"
/**
* Returns microarchitecture name, e.g.
*
* puts(__cpu_march(__cpu_model.__cpu_subtype));
*
*/
const char *__cpu_march(unsigned subtype) {
switch (subtype) {
case INTEL_COREI7_NEHALEM:
return "nehalem";
case INTEL_COREI7_WESTMERE:
return "westmere";
case INTEL_COREI7_SANDYBRIDGE:
return "sandybridge";
case AMDFAM10H_BARCELONA:
return "amdfam10";
case AMDFAM10H_SHANGHAI:
return "amdfam10";
case AMDFAM10H_ISTANBUL:
return "amdfam10";
case AMDFAM15H_BDVER1:
return "bdver2";
case AMDFAM15H_BDVER2:
return "bdver2";
case AMDFAM15H_BDVER3:
return "bdver3";
case AMDFAM15H_BDVER4:
return "bdver4";
case AMDFAM17H_ZNVER1:
return "znver2";
case INTEL_COREI7_IVYBRIDGE:
return "ivybridge";
case INTEL_COREI7_HASWELL:
return "haswell";
case INTEL_COREI7_BROADWELL:
return "broadwell";
case INTEL_COREI7_SKYLAKE:
return "skylake";
case INTEL_COREI7_SKYLAKE_AVX512:
return "skylake-avx512";
case INTEL_COREI7_CANNONLAKE:
return "cannonlake";
case INTEL_COREI7_ICELAKE_CLIENT:
return "icelake-client";
case INTEL_COREI7_ICELAKE_SERVER:
return "icelake-server";
case AMDFAM17H_ZNVER2:
return "znver2";
case INTEL_COREI7_CASCADELAKE:
return "cascadelake";
case INTEL_COREI7_TIGERLAKE:
return "tigerlake";
case INTEL_COREI7_COOPERLAKE:
return "cooperlake";
case INTEL_COREI7_SAPPHIRERAPIDS:
return "sapphirerapids";
case INTEL_COREI7_ALDERLAKE:
return "alderlake";
case AMDFAM19H_ZNVER3:
return "znver3";
case INTEL_COREI7_ROCKETLAKE:
return "rocketlake";
case AMDFAM19H_ZNVER4:
return "znver4";
case INTEL_COREI7_GRANITERAPIDS:
return "graniterapids";
case INTEL_COREI7_GRANITERAPIDS_D:
return "graniterapids-d";
case INTEL_COREI7_ARROWLAKE:
return "arrowlake";
case INTEL_COREI7_ARROWLAKE_S:
return "arrowlake-s";
case INTEL_COREI7_PANTHERLAKE:
return "pantherlake";
default:
return 0;
}
}

View file

@ -93,18 +93,37 @@ kCpuids:.long 0,0,0,0 // EAX=0 (Basic Processor Info)
add $4*4,%rdi
jmp 2b
3: nop
#if !X86_NEED(AVX2)
// test if cpu supports avx
testb X86_HAVE(AVX)(%r8)
jz 5f
jz 7f
testb X86_HAVE(OSXSAVE)(%r8)
jz 4f
jz 5f
xor %ecx,%ecx
xgetbv
mov %eax,%ecx
// test if operating system saves avx registers
and $XCR0_SSE|XCR0_AVX,%eax
cmp $XCR0_SSE|XCR0_AVX,%eax
je 5f
4: btr $X86_BIT(AVX),X86_WORD(AVX)(%r8)
jne 5f
// test if operating system saves avx512 registers
and $XCR0_OPMASK|XCR0_ZMM_HI256|XCR0_HI16_ZMM,%ecx
cmp $XCR0_OPMASK|XCR0_ZMM_HI256|XCR0_HI16_ZMM,%ecx
jne 6f
je 7f
// operating system doesn't support avx
5: btr $X86_BIT(AVX),X86_WORD(AVX)(%r8)
btr $X86_BIT(AVX2),X86_WORD(AVX2)(%r8)
#endif
5: pop %rbx
// operating system supports avx but not avx512
6: andl $~(1<<30|1<<28|1<<17|1<<27|1<<16|1<<21|1<<26|1<<31),KCPUIDS(7H, EBX)(%r8)
andl $~(1<<1|1<<12|1<<6|1<<11|1<<14),KCPUIDS(7H, ECX)(%r8)
andl $~(1<<2|1<<3|1<<8),KCPUIDS(7H, EDX)(%r8)
andl $~(1<<5),KCPUIDS(7H_1H, EAX)(%r8)
// we're done
7: pop %rbx
.init.end 201,_init_kCpuids

View file

@ -17,6 +17,5 @@
#define kNtMem4mbPages 0x80000000
#define kNtMemReplacePlaceholder 0x00004000
#define kNtMemLargePages 0x20000000
#endif /* COSMOPOLITAN_LIBC_NT_ENUM_MEMFLAGS_H_ */