mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-11 21:49:12 +00:00
Simplify TLS and reduce startup latency
This change simplifies the thread-local storage support code. On Windows and Mac OS X the startup latency of __enable_tls() has been reduced from 30ms to 1ms. On Windows, TLS memory accesses will now go much faster due to better self-modifying code that prevents a function call and acquires our thread information block pointer in a single instruction.
This commit is contained in:
parent
38c3fa63fe
commit
b1d9d11be1
15 changed files with 136 additions and 312 deletions
|
@ -28,5 +28,5 @@
|
|||
*/
|
||||
privileged nocallersavedregisters errno_t *(__errno_location)(void) {
|
||||
if (!__tls_enabled) return &__errno;
|
||||
return (errno_t *)(__get_tls_inline() + 0x3c);
|
||||
return (errno_t *)(__get_tls_privileged() + 0x3c);
|
||||
}
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/gettls.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
|
||||
/**
|
||||
* Returns address of thread information block.
|
||||
*
|
||||
* This function must not be called until TLS is initialized.
|
||||
*
|
||||
* @see __get_tls_inline()
|
||||
* @see __install_tls()
|
||||
* @see _spawn()
|
||||
*/
|
||||
optimizespeed char *__get_tls(void) {
|
||||
return __get_tls_inline();
|
||||
}
|
|
@ -39,8 +39,6 @@ LIBC_SYSV_A_FILES := \
|
|||
libc/sysv/systemfive.S \
|
||||
libc/sysv/errno_location.greg.c \
|
||||
libc/sysv/errno.c \
|
||||
libc/sysv/gettls.greg.c \
|
||||
libc/sysv/tlspolyfill.S \
|
||||
libc/sysv/errfun.S \
|
||||
libc/sysv/strace.greg.c \
|
||||
libc/sysv/describeos.greg.c \
|
||||
|
|
|
@ -1,156 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Code morphing TLS polyfills for The New Technology.
|
||||
//
|
||||
// @note msvc generates this code so it's stable
|
||||
// @note func ordering follows x86 reg encoding
|
||||
// @note each function is exactly 18 bytes
|
||||
// @see __enable_threads()
|
||||
|
||||
__tls_mov_nt_rax:
|
||||
push %rcx
|
||||
mov __tls_index(%rip),%ecx
|
||||
mov %gs:0x1480(,%rcx,8),%rax
|
||||
pop %rcx
|
||||
ret
|
||||
.endfn __tls_mov_nt_rax,globl,hidden
|
||||
|
||||
__tls_mov_nt_rcx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rcx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rcx
|
||||
|
||||
__tls_mov_nt_rdx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rdx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rdx
|
||||
|
||||
__tls_mov_nt_rbx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rbx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rbx
|
||||
|
||||
__tls_mov_nt_rsp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rsp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rsp
|
||||
|
||||
__tls_mov_nt_rbp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rbp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rbp
|
||||
|
||||
__tls_mov_nt_rsi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rsi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rsi
|
||||
|
||||
__tls_mov_nt_rdi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rdi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_mov_nt_rdi
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__tls_add_nt_rax:
|
||||
push %rcx
|
||||
mov __tls_index(%rip),%ecx
|
||||
add %gs:0x1480(,%rcx,8),%rax
|
||||
pop %rcx
|
||||
ret
|
||||
.endfn __tls_add_nt_rax,globl,hidden
|
||||
|
||||
__tls_add_nt_rcx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rcx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rcx
|
||||
|
||||
__tls_add_nt_rdx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rdx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rdx
|
||||
|
||||
__tls_add_nt_rbx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rbx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rbx
|
||||
|
||||
__tls_add_nt_rsp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rsp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rsp
|
||||
|
||||
__tls_add_nt_rbp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rbp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rbp
|
||||
|
||||
__tls_add_nt_rsi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rsi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rsi
|
||||
|
||||
__tls_add_nt_rdi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
add %gs:0x1480(,%rax,8),%rdi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __tls_add_nt_rdi
|
Loading…
Add table
Add a link
Reference in a new issue