mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-01 08:48:29 +00:00
Make mutex calling code 10x tinier
Calls to lock/unlock functions are now NOPs by default. The first time clone() is called, they get turned into CALL instructions. Doing this caused funcctions like fputc() to shrink from 85 bytes to 45+4 bytes. Since the ANSI solution of `(__threaded && lock())` inlines os much superfluous binary content into functions all over the place.
This commit is contained in:
parent
8cdec62f5b
commit
8b72490431
32 changed files with 494 additions and 210 deletions
|
@ -72,13 +72,33 @@ struct CloneArgs {
|
|||
void *arg;
|
||||
};
|
||||
|
||||
struct __tfork {
|
||||
void *tf_tcb;
|
||||
int32_t *tf_tid;
|
||||
void *tf_stack;
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// THREADING RUNTIME
|
||||
|
||||
static char tibdefault[64];
|
||||
extern int __threadcalls_end[];
|
||||
extern int __threadcalls_start[];
|
||||
|
||||
static privileged dontinline void FixupThreadCalls(void) {
|
||||
/*
|
||||
* _NOPL("__threadcalls", func)
|
||||
*
|
||||
* we have this
|
||||
*
|
||||
* 0f 1f 05 b1 19 00 00 nopl func(%rip)
|
||||
*
|
||||
* we're going to turn it into this
|
||||
*
|
||||
* 67 67 e8 b1 19 00 00 addr32 addr32 call func
|
||||
*/
|
||||
__morph_begin();
|
||||
for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) {
|
||||
_base[*p + 0] = 0x67;
|
||||
_base[*p + 1] = 0x67;
|
||||
_base[*p + 2] = 0xe8;
|
||||
}
|
||||
__morph_end();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// THE NEW TECHNOLOGY
|
||||
|
@ -162,7 +182,9 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
|
|||
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
}
|
||||
*wt->ctid = tid;
|
||||
if (wt->ctid) {
|
||||
*wt->ctid = tid;
|
||||
}
|
||||
func(arg);
|
||||
// we no longer use the stack after this point
|
||||
// %rax = int bsdthread_terminate(%rdi = void *stackaddr,
|
||||
|
@ -266,6 +288,12 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// OPEN BESIYATA DISHMAYA
|
||||
|
||||
struct __tfork {
|
||||
void *tf_tcb;
|
||||
int32_t *tf_tid;
|
||||
void *tf_stack;
|
||||
};
|
||||
|
||||
int __tfork(struct __tfork *params, size_t psize, struct CloneArgs *wt);
|
||||
asm("__tfork:\n\t"
|
||||
"push\t$8\n\t"
|
||||
|
@ -515,6 +543,9 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
|
|||
struct CloneArgs *wt;
|
||||
|
||||
// transition program to threaded state
|
||||
if (!__threaded && (flags & CLONE_THREAD)) {
|
||||
FixupThreadCalls();
|
||||
}
|
||||
if ((flags & CLONE_SETTLS) && !__tls_enabled) {
|
||||
if (~flags & CLONE_THREAD) {
|
||||
STRACE("clone() tls w/o thread");
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_RUNTIME_CXAATEXIT_H_
|
||||
#define COSMOPOLITAN_LIBC_RUNTIME_CXAATEXIT_H_
|
||||
#include "libc/intrin/nopl.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
@ -23,8 +24,13 @@ void __cxa_lock(void);
|
|||
void __cxa_unlock(void);
|
||||
void __cxa_printexits(FILE *, void *);
|
||||
|
||||
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
|
||||
#define __cxa_lock() _NOPL0("__threadcalls", __cxa_lock)
|
||||
#define __cxa_unlock() _NOPL0("__threadcalls", __cxa_unlock)
|
||||
#else
|
||||
#define __cxa_lock() (__threaded ? __cxa_lock() : 0)
|
||||
#define __cxa_unlock() (__threaded ? __cxa_unlock() : 0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/bits/midpoint.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/nopl.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/nt/version.h"
|
||||
|
@ -67,8 +68,13 @@ void ReleaseMemoryNt(struct MemoryIntervals *, int, int) hidden;
|
|||
int UntrackMemoryIntervals(void *, size_t) hidden;
|
||||
size_t GetMemtrackSize(struct MemoryIntervals *);
|
||||
|
||||
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
|
||||
#define __mmi_lock() _NOPL0("__threadcalls", __mmi_lock)
|
||||
#define __mmi_unlock() _NOPL0("__threadcalls", __mmi_unlock)
|
||||
#else
|
||||
#define __mmi_lock() (__threaded ? __mmi_lock() : 0)
|
||||
#define __mmi_unlock() (__threaded ? __mmi_unlock() : 0)
|
||||
#endif
|
||||
|
||||
#define IsLegalPointer(p) \
|
||||
(-0x800000000000 <= (intptr_t)(p) && (intptr_t)(p) <= 0x7fffffffffff)
|
||||
|
|
|
@ -57,9 +57,8 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot,
|
|||
* @return 0 on success, or -1 w/ errno
|
||||
*/
|
||||
privileged void __morph_begin(void) {
|
||||
sigset_t ss;
|
||||
sigset_t ss = {{-1, -1}};
|
||||
if (!IsWindows()) {
|
||||
sigfillset(&ss);
|
||||
sys_sigprocmask(SIG_BLOCK, &ss, &oldss);
|
||||
}
|
||||
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue