Simplify TLS and reduce startup latency

This change simplifies the thread-local storage support code. On Windows
and Mac OS X the startup latency of __enable_tls() has been reduced from
30ms to 1ms. On Windows, TLS memory accesses will now go much faster due
to better self-modifying code that prevents a function call and acquires
our thread information block pointer in a single instruction.
This commit is contained in:
Justine Tunney 2022-07-18 03:33:32 -07:00
parent 38c3fa63fe
commit b1d9d11be1
15 changed files with 136 additions and 312 deletions

View file

@ -314,7 +314,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt,
if (!__tls_enabled) {
x = __pid;
} else {
x = *(int *)(__get_tls_inline() + 0x38);
x = *(int *)(__get_tls_privileged() + 0x38);
}
} else {
x = 666;
@ -395,8 +395,7 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt,
i = 0;
m = (1 << base) - 1;
if (hash && x) sign = hash;
do
z[i++ & 127] = abet[x & m];
do z[i++ & 127] = abet[x & m];
while ((x >>= base) || (pdot && i < prec));
goto EmitNumber;