Make mutex calling code 10x tinier

Calls to lock/unlock functions are now NOPs by default. The first time
clone() is called, they get turned into CALL instructions. Doing this
caused funcctions like fputc() to shrink from 85 bytes to 45+4 bytes.
Since the ANSI solution of `(__threaded && lock())` inlines os much
superfluous binary content into functions all over the place.
This commit is contained in:
Justine Tunney 2022-06-12 19:33:42 -07:00
parent 8cdec62f5b
commit 8b72490431
32 changed files with 494 additions and 210 deletions

View file

@ -1,5 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_STATE_INTERNAL_H_
#define COSMOPOLITAN_LIBC_CALLS_STATE_INTERNAL_H_
#include "libc/intrin/nopl.h"
#include "libc/nexgen32e/threaded.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -15,10 +16,21 @@ void __fds_unlock(void);
void __sig_lock(void);
void __sig_unlock(void);
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __fds_lock() _NOPL0("__threadcalls", __fds_lock)
#define __fds_unlock() _NOPL0("__threadcalls", __fds_unlock)
#else
#define __fds_lock() (__threaded ? __fds_lock() : 0)
#define __fds_unlock() (__threaded ? __fds_unlock() : 0)
#endif
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __sig_lock() _NOPL0("__threadcalls", __sig_lock)
#define __sig_unlock() _NOPL0("__threadcalls", __sig_unlock)
#else
#define __sig_lock() (__threaded ? __sig_lock() : 0)
#define __sig_unlock() (__threaded ? __sig_unlock() : 0)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */