Improve -march=native micro-architecture detection

This commit is contained in:
Justine Tunney 2024-05-29 10:12:49 -07:00
parent 4c77acdfcf
commit 7c8df05042
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 21 additions and 269 deletions

Binary file not shown.

Binary file not shown.

View file

@ -6,14 +6,14 @@ if [ -n "$OBJDUMP" ]; then
fi fi
find_objdump() { find_objdump() {
if [ -x .cosmocc/3.3.2/bin/$1-linux-cosmo-objdump ]; then if [ -x .cosmocc/3.3.5/bin/$1-linux-cosmo-objdump ]; then
OBJDUMP=.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump OBJDUMP=.cosmocc/3.3.5/bin/$1-linux-cosmo-objdump
elif [ -x .cosmocc/3.3.2/bin/$1-linux-musl-objdump ]; then elif [ -x .cosmocc/3.3.5/bin/$1-linux-musl-objdump ]; then
OBJDUMP=.cosmocc/3.3.2/bin/$1-linux-musl-objdump OBJDUMP=.cosmocc/3.3.5/bin/$1-linux-musl-objdump
elif [ -x "$COSMO/.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump" ]; then elif [ -x "$COSMO/.cosmocc/3.3.5/bin/$1-linux-cosmo-objdump" ]; then
OBJDUMP="$COSMO/.cosmocc/3.3.2/bin/$1-linux-cosmo-objdump" OBJDUMP="$COSMO/.cosmocc/3.3.5/bin/$1-linux-cosmo-objdump"
elif [ -x "$COSMO/.cosmocc/3.3.2/bin/$1-linux-musl-objdump" ]; then elif [ -x "$COSMO/.cosmocc/3.3.5/bin/$1-linux-musl-objdump" ]; then
OBJDUMP="$COSMO/.cosmocc/3.3.2/bin/$1-linux-musl-objdump" OBJDUMP="$COSMO/.cosmocc/3.3.5/bin/$1-linux-musl-objdump"
else else
echo "error: toolchain not found (try running 'cosmocc --update' or 'make' in the cosmo monorepo)" >&2 echo "error: toolchain not found (try running 'cosmocc --update' or 'make' in the cosmo monorepo)" >&2
exit 1 exit 1

View file

@ -33,6 +33,7 @@
#include "libc/fmt/libgen.h" #include "libc/fmt/libgen.h"
#include "libc/fmt/magnumstrs.internal.h" #include "libc/fmt/magnumstrs.internal.h"
#include "libc/intrin/safemacros.internal.h" #include "libc/intrin/safemacros.internal.h"
#include "libc/intrin/x86.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/log/appendresourcereport.internal.h" #include "libc/log/appendresourcereport.internal.h"
#include "libc/log/color.internal.h" #include "libc/log/color.internal.h"
@ -1054,134 +1055,11 @@ int main(int argc, char *argv[]) {
#ifdef __x86_64__ #ifdef __x86_64__
} else if (!strcmp(argv[i], "-march=native")) { } else if (!strcmp(argv[i], "-march=native")) {
const struct X86ProcessorModel *model; const char *march;
if (X86_HAVE(XOP)) if ((march = __cpu_march(__cpu_model.__cpu_subtype))) {
AddArg("-mxop"); char *buf = malloc(7 + strlen(march) + 1);
if (X86_HAVE(SSE4A)) stpcpy(stpcpy(buf, "-march="), march);
AddArg("-msse4a"); AddArg(buf);
if (X86_HAVE(SSE3))
AddArg("-msse3");
if (X86_HAVE(SSSE3))
AddArg("-mssse3");
if (X86_HAVE(SSE4_1))
AddArg("-msse4.1");
if (X86_HAVE(SSE4_2))
AddArg("-msse4.2");
if (X86_HAVE(AVX))
AddArg("-mavx");
if (X86_HAVE(AVX2)) {
AddArg("-mavx2");
if (isgcc) {
AddArg("-msse2avx");
AddArg("-Wa,-msse2avx");
}
}
if (X86_HAVE(AVX512F))
AddArg("-mavx512f");
if (X86_HAVE(AVX512PF))
AddArg("-mavx512pf");
if (X86_HAVE(AVX512ER))
AddArg("-mavx512er");
if (X86_HAVE(AVX512CD))
AddArg("-mavx512cd");
if (X86_HAVE(AVX512VL))
AddArg("-mavx512vl");
if (X86_HAVE(AVX512BW))
AddArg("-mavx512bw");
if (X86_HAVE(AVX512DQ))
AddArg("-mavx512dq");
if (X86_HAVE(AVX512IFMA))
AddArg("-mavx512ifma");
if (X86_HAVE(AVX512VBMI))
AddArg("-mavx512vbmi");
if (X86_HAVE(SHA))
AddArg("-msha");
if (X86_HAVE(AES))
AddArg("-maes");
if (X86_HAVE(VAES))
AddArg("-mvaes");
if (X86_HAVE(PCLMUL))
AddArg("-mpclmul");
if (X86_HAVE(FSGSBASE))
AddArg("-mfsgsbase");
if (X86_HAVE(F16C))
AddArg("-mf16c");
if (X86_HAVE(FMA))
AddArg("-mfma");
if (X86_HAVE(POPCNT))
AddArg("-mpopcnt");
if (X86_HAVE(BMI))
AddArg("-mbmi");
if (X86_HAVE(BMI2))
AddArg("-mbmi2");
if (X86_HAVE(ADX))
AddArg("-madx");
if (X86_HAVE(FXSR))
AddArg("-mfxsr");
if ((model = getx86processormodel(kX86ProcessorModelKey))) {
switch (model->march) {
case X86_MARCH_CORE2:
AddArg("-march=core2");
break;
case X86_MARCH_NEHALEM:
AddArg("-march=nehalem");
break;
case X86_MARCH_WESTMERE:
AddArg("-march=westmere");
break;
case X86_MARCH_SANDYBRIDGE:
AddArg("-march=sandybridge");
break;
case X86_MARCH_IVYBRIDGE:
AddArg("-march=ivybridge");
break;
case X86_MARCH_HASWELL:
AddArg("-march=haswell");
break;
case X86_MARCH_BROADWELL:
AddArg("-march=broadwell");
break;
case X86_MARCH_SKYLAKE:
case X86_MARCH_KABYLAKE:
AddArg("-march=skylake");
break;
case X86_MARCH_CANNONLAKE:
AddArg("-march=cannonlake");
break;
case X86_MARCH_ICELAKE:
if (model->grade >= X86_GRADE_SERVER) {
AddArg("-march=icelake-server");
} else {
AddArg("-march=icelake-client");
}
break;
case X86_MARCH_TIGERLAKE:
AddArg("-march=tigerlake");
break;
case X86_MARCH_BONNELL:
case X86_MARCH_SALTWELL:
AddArg("-march=bonnell");
break;
case X86_MARCH_SILVERMONT:
case X86_MARCH_AIRMONT:
AddArg("-march=silvermont");
break;
case X86_MARCH_GOLDMONT:
AddArg("-march=goldmont");
break;
case X86_MARCH_GOLDMONTPLUS:
AddArg("-march=goldmont-plus");
break;
case X86_MARCH_TREMONT:
AddArg("-march=tremont");
break;
case X86_MARCH_KNIGHTSLANDING:
AddArg("-march=knl");
break;
case X86_MARCH_KNIGHTSMILL:
AddArg("-march=knm");
break;
}
} }
#endif /* __x86_64__ */ #endif /* __x86_64__ */

View file

@ -17,8 +17,7 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/nexgen32e/x86feature.h" #include "libc/intrin/x86.h"
#include "libc/nexgen32e/x86info.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "third_party/getopt/getopt.internal.h" #include "third_party/getopt/getopt.internal.h"
@ -37,8 +36,8 @@
*/ */
#define VERSION \ #define VERSION \
"-march=native flag printer v0.1\n" \ "-march=native flag printer v0.2\n" \
"copyright 2023 justine alexandra roberts tunney\n" "copyright 2024 justine alexandra roberts tunney\n"
#define USAGE \ #define USAGE \
"usage: march-native [-hvc]\n" \ "usage: march-native [-hvc]\n" \
@ -75,135 +74,9 @@ static void Puts(const char *s) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
GetOpts(argc, argv); GetOpts(argc, argv);
#ifdef __x86_64__ #ifdef __x86_64__
struct X86ProcessorModel *model; const char *march;
if (X86_HAVE(XOP)) if ((march = __cpu_march(__cpu_model.__cpu_subtype)))
Puts("-mxop"); tinyprint(1, "-march=", march, "\n", NULL);
if (X86_HAVE(SSE4A))
Puts("-msse4a");
if (X86_HAVE(SSE3))
Puts("-msse3");
if (X86_HAVE(SSSE3))
Puts("-mssse3");
if (X86_HAVE(SSE4_1))
Puts("-msse4.1");
if (X86_HAVE(SSE4_2))
Puts("-msse4.2");
if (X86_HAVE(AVX))
Puts("-mavx");
if (X86_HAVE(AVX2)) {
Puts("-mavx2");
if (!isclang) {
Puts("-msse2avx");
Puts("-Wa,-msse2avx");
}
}
if (X86_HAVE(AVX512F))
Puts("-mavx512f");
if (X86_HAVE(AVX512PF))
Puts("-mavx512pf");
if (X86_HAVE(AVX512ER))
Puts("-mavx512er");
if (X86_HAVE(AVX512CD))
Puts("-mavx512cd");
if (X86_HAVE(AVX512VL))
Puts("-mavx512vl");
if (X86_HAVE(AVX512BW))
Puts("-mavx512bw");
if (X86_HAVE(AVX512DQ))
Puts("-mavx512dq");
if (X86_HAVE(AVX512IFMA))
Puts("-mavx512ifma");
if (X86_HAVE(AVX512VBMI))
Puts("-mavx512vbmi");
if (X86_HAVE(SHA))
Puts("-msha");
if (X86_HAVE(AES))
Puts("-maes");
if (X86_HAVE(VAES))
Puts("-mvaes");
if (X86_HAVE(PCLMUL))
Puts("-mpclmul");
if (X86_HAVE(FSGSBASE))
Puts("-mfsgsbase");
if (X86_HAVE(F16C))
Puts("-mf16c");
if (X86_HAVE(FMA))
Puts("-mfma");
if (X86_HAVE(POPCNT))
Puts("-mpopcnt");
if (X86_HAVE(BMI))
Puts("-mbmi");
if (X86_HAVE(BMI2))
Puts("-mbmi2");
if (X86_HAVE(ADX))
Puts("-madx");
if (X86_HAVE(FXSR))
Puts("-mfxsr");
if ((model = (void *)getx86processormodel(kX86ProcessorModelKey))) {
switch (model->march) {
case X86_MARCH_CORE2:
Puts("-march=core2");
break;
case X86_MARCH_NEHALEM:
Puts("-march=nehalem");
break;
case X86_MARCH_WESTMERE:
Puts("-march=westmere");
break;
case X86_MARCH_SANDYBRIDGE:
Puts("-march=sandybridge");
break;
case X86_MARCH_IVYBRIDGE:
Puts("-march=ivybridge");
break;
case X86_MARCH_HASWELL:
Puts("-march=haswell");
break;
case X86_MARCH_BROADWELL:
Puts("-march=broadwell");
break;
case X86_MARCH_SKYLAKE:
case X86_MARCH_KABYLAKE:
Puts("-march=skylake");
break;
case X86_MARCH_CANNONLAKE:
Puts("-march=cannonlake");
break;
case X86_MARCH_ICELAKE:
if (model->grade >= X86_GRADE_SERVER) {
Puts("-march=icelake-server");
} else {
Puts("-march=icelake-client");
}
break;
case X86_MARCH_TIGERLAKE:
Puts("-march=tigerlake");
break;
case X86_MARCH_BONNELL:
case X86_MARCH_SALTWELL:
Puts("-march=bonnell");
break;
case X86_MARCH_SILVERMONT:
case X86_MARCH_AIRMONT:
Puts("-march=silvermont");
break;
case X86_MARCH_GOLDMONT:
Puts("-march=goldmont");
break;
case X86_MARCH_GOLDMONTPLUS:
Puts("-march=goldmont-plus");
break;
case X86_MARCH_TREMONT:
Puts("-march=tremont");
break;
case X86_MARCH_KNIGHTSLANDING:
Puts("-march=knl");
break;
case X86_MARCH_KNIGHTSMILL:
Puts("-march=knm");
break;
}
}
#elif defined(__aarch64__) #elif defined(__aarch64__)
// TODO(jart): How can we determine CPU features on AARCH64? // TODO(jart): How can we determine CPU features on AARCH64?
#else #else

View file

@ -80,6 +80,7 @@
"__VSX__" "__VSX__"
"__ADX__" "__ADX__"
"__PCLMUL__" "__PCLMUL__"
"__VPCLMULQDQ__"
"__POPCNT__" "__POPCNT__"
"__RDRND__" "__RDRND__"
"__RDSEED__" "__RDSEED__"