Improve performance of bitscanning intrinsics

This change helps spectre more intelligently plan execution, by working
around false output dependencies, impacting ops like popcnt bsr and bsf
This commit is contained in:
Justine Tunney 2021-06-15 06:24:46 -07:00
parent 29cb53881e
commit 87d7010495
13 changed files with 100 additions and 224 deletions

View file

@ -7,15 +7,15 @@ COSMOPOLITAN_C_START_
unsigned long popcnt(unsigned long) pureconst;
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define popcnt(X) \
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
unsigned long Res, Pop = (X); \
if (X86_HAVE(POPCNT)) { \
asm("popcnt\t%1,%0" : "=r"(Res) : "r"(Pop) : "cc"); \
} else { \
Res = (popcnt)(Pop); \
} \
Res; \
#define popcnt(X) \
(__builtin_constant_p(X) ? __builtin_popcountll(X) : ({ \
unsigned long PoP = (X); \
if (X86_HAVE(POPCNT)) { \
asm("popcnt\t%0,%0" : "+r"(PoP) : /* no inputs */ : "cc"); \
} else { \
PoP = (popcnt)(PoP); \
} \
PoP; \
}))
#endif /* GNUC && !ANSI */