Make mutex calling code 10x tinier

Calls to lock/unlock functions are now NOPs by default. The first time
clone() is called, they get turned into CALL instructions. Doing this
caused funcctions like fputc() to shrink from 85 bytes to 45+4 bytes.
Since the ANSI solution of `(__threaded && lock())` inlines os much
superfluous binary content into functions all over the place.
This commit is contained in:
Justine Tunney 2022-06-12 19:33:42 -07:00
parent 8cdec62f5b
commit 8b72490431
32 changed files with 494 additions and 210 deletions

View file

@ -1607,9 +1607,7 @@ ape_pad_text:
.type ape_pad_privileged,@object
.hidden ape_pad_privileged
ape_pad_privileged:
#if !IsTiny()
.align 4096
#endif
.previous
.section .ape.pad.rodata,"a",@progbits

View file

@ -1,15 +0,0 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
_Thread_local int foo;
int main(int argc, char *argv[]) {
foo = 1;
}

View file

@ -55,7 +55,7 @@
* @see strftime(), gettimeofday()
* @asyncsignalsafe
*/
noinstrument int clock_gettime(int clockid, struct timespec *ts) {
int clock_gettime(int clockid, struct timespec *ts) {
int rc;
char *buf;
if (IsAsan() && !__asan_is_valid_timespec(ts)) {

View file

@ -92,8 +92,8 @@ static textwindows struct Signal *__sig_remove(void) {
* @note called from main thread
* @return true if EINTR should be returned by caller
*/
static privileged bool __sig_deliver(bool restartable, int sig, int si_code,
ucontext_t *ctx) {
static bool __sig_deliver(bool restartable, int sig, int si_code,
ucontext_t *ctx) {
unsigned rva, flags;
siginfo_t info, *infop;
STRACE("delivering %G", sig);
@ -162,8 +162,7 @@ static textwindows bool __sig_isfatal(int sig) {
* @param restartable can be used to suppress true return if SA_RESTART
* @return true if signal was delivered
*/
privileged bool __sig_handle(bool restartable, int sig, int si_code,
ucontext_t *ctx) {
bool __sig_handle(bool restartable, int sig, int si_code, ucontext_t *ctx) {
bool delivered;
switch (__sighandrvas[sig]) {
case (intptr_t)SIG_DFL:

View file

@ -1,5 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_STATE_INTERNAL_H_
#define COSMOPOLITAN_LIBC_CALLS_STATE_INTERNAL_H_
#include "libc/intrin/nopl.h"
#include "libc/nexgen32e/threaded.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
@ -15,10 +16,21 @@ void __fds_unlock(void);
void __sig_lock(void);
void __sig_unlock(void);
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __fds_lock() _NOPL0("__threadcalls", __fds_lock)
#define __fds_unlock() _NOPL0("__threadcalls", __fds_unlock)
#else
#define __fds_lock() (__threaded ? __fds_lock() : 0)
#define __fds_unlock() (__threaded ? __fds_unlock() : 0)
#endif
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __sig_lock() _NOPL0("__threadcalls", __sig_lock)
#define __sig_unlock() _NOPL0("__threadcalls", __sig_unlock)
#else
#define __sig_lock() (__threaded ? __sig_lock() : 0)
#define __sig_unlock() (__threaded ? __sig_unlock() : 0)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -1,63 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/weaken.h"
#include "libc/calls/calls.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/lockcmpxchgp.h"
#include "libc/intrin/spinlock.h"
#include "libc/log/log.h"
#include "libc/nexgen32e/rdtsc.h"
#include "libc/runtime/internal.h"
#include "libc/time/clockstonanos.internal.h"
privileged void _spinlock_debug_4(int *lock, const char *lockname,
const char *file, int line,
const char *func) {
unsigned i;
int me, owner;
uint64_t ts1, ts2;
me = gettid();
owner = 0;
if (!_lockcmpxchgp(lock, &owner, me)) {
if (owner == me) {
kprintf("%s:%d: error: lock re-entry on %s in %s()\n", file, line,
lockname, func);
if (weaken(__die)) weaken(__die)();
__restorewintty();
_Exit(1);
}
i = 0;
ts1 = rdtsc();
for (;;) {
owner = 0;
if (_lockcmpxchgp(lock, &owner, me)) break;
ts2 = rdtsc();
if (ClocksToNanos(ts1, ts2) > 1000000000ul) {
ts1 = ts2;
kprintf("%s:%d: warning: slow lock on %s in %s()\n", file, line,
lockname, func);
}
if (++i & 7) {
__builtin_ia32_pause();
} else {
sched_yield();
}
}
}
}

View file

@ -16,7 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#define ShouldUseMsabiAttribute() 1
#include "libc/calls/strace.internal.h"
#include "libc/dce.h"
#include "libc/nexgen32e/vendor.internal.h"
@ -30,13 +29,13 @@
* When running on bare metal, this function will reboot your computer
* by hosing the interrupt descriptors and triple faulting the system.
*
* @param exitcode is masked with 255
* @param exitcode is masked with 255 on unix (but not windows)
* @asyncsignalsafe
* @threadsafe
* @vforksafe
* @noreturn
*/
privileged wontreturn void _Exit(int exitcode) {
wontreturn void _Exit(int exitcode) {
int i;
STRACE("_Exit(%d)", exitcode);
if (!IsWindows() && !IsMetal()) {
@ -45,7 +44,7 @@ privileged wontreturn void _Exit(int exitcode) {
: "a"(__NR_exit_group), "D"(exitcode)
: "rcx", "r11", "memory");
} else if (IsWindows()) {
__imp_ExitProcess(exitcode & 0xff);
ExitProcess(exitcode);
}
asm("push\t$0\n\t"
"push\t$0\n\t"

View file

@ -85,6 +85,7 @@ o/$(MODE)/libc/intrin/_spinlock_debug_4.o: \
-mno-fentry \
-ffreestanding \
-fno-sanitize=all \
-mgeneral-regs-only \
-fno-stack-protector
o/$(MODE)/libc/intrin/tls.greg.o \

68
libc/intrin/nopl.h Normal file
View file

@ -0,0 +1,68 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#define COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* @fileoverview Turns CALLs into NOPs that are fixupable at runtime.
*
* Things like lock/unlock function calls can take on average 100ms.
* Libc needs to use internal locking pervasively in order to support
* threads. So there's a lot of cost everywhere, even though most progs
* don't use threads. In ANSI mode we dispatching (__threaded && lock())
* to solve this problem, but if we write lock statements that way, it
* adds a lot of bloat to the functions that call locking routines. So
* what we do here is replace the CALL instruction with NOP, which keeps
* the code just as fast as inlining, while making code size 10x tinier.
*/
#define _NOPL_PROLOGUE(SECTION) \
".section \".sort.rodata." SECTION ".1" \
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
".align\t4\n\t" \
".type\t\"" SECTION "_start\",@object\n\t" \
".globl\t\"" SECTION "_start\"\n\t" \
".equ\t\"" SECTION "_start\",.\n\t" \
".previous\n\t"
#define _NOPL_EPILOGUE(SECTION) \
".section \".sort.rodata." SECTION ".3" \
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
".align\t4\n\t" \
".type\"" SECTION "_end\",@object\n\t" \
".globl\t\"" SECTION "_end\"\n\t" \
".equ\t\"" SECTION "_end\",.\n\t" \
".previous\n\t"
#define _NOPL0(SECTION, FUNC) \
({ \
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
_NOPL_EPILOGUE(SECTION) /* */ \
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
".align\t4\n\t" \
".long\t353f-%a1\n\t" \
".previous\n353:\t" \
"nopl\t%a0" \
: /* no inputs */ \
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
: "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "memory"); \
0; \
})
#define _NOPL1(SECTION, FUNC, ARG) \
({ \
register autotype(ARG) __arg asm("rdi") = ARG; \
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
_NOPL_EPILOGUE(SECTION) /* */ \
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
".align\t4\n\t" \
".long\t353f-%a2\n\t" \
".previous\n353:\t" \
"nopl\t%a1" \
: "+D"(__arg) \
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
: "rax", "rsi", "rdx", "rcx", "r8", "r9", "memory"); \
0; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_NOPL_H_ */

View file

@ -7,15 +7,15 @@ COSMOPOLITAN_C_START_
#define PTHREAD_ONCE_INIT 0
#define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_RECURSIVE
#define PTHREAD_MUTEX_NORMAL 0
#define PTHREAD_MUTEX_DEFAULT 0
#define PTHREAD_MUTEX_RECURSIVE 1
#define PTHREAD_MUTEX_ERRORCHECK 2
#define PTHREAD_MUTEX_STALLED 0
#define PTHREAD_MUTEX_ROBUST 1
/* clang-format off */
#define PTHREAD_MUTEX_INITIALIZER {0}
#define PTHREAD_MUTEX_INITIALIZER {PTHREAD_MUTEX_DEFAULT}
#define PTHREAD_RWLOCK_INITIALIZER {{{0}}}
#define PTHREAD_COND_INITIALIZER {{{0}}}
/* clang-format on */
@ -24,21 +24,22 @@ typedef unsigned long *pthread_t;
typedef int pthread_once_t;
typedef struct {
int attr;
int reent;
_Atomic(int) owner;
_Atomic(int) waits;
int reent;
} pthread_mutex_t;
typedef struct {
unsigned __attr;
int attr;
} pthread_mutexattr_t;
typedef struct {
unsigned __attr;
int attr;
} pthread_condattr_t;
typedef struct {
unsigned __attr[2];
int attr[2];
} pthread_rwlockattr_t;
typedef struct {
@ -69,6 +70,7 @@ wontreturn void pthread_exit(void *);
pureconst pthread_t pthread_self(void);
int pthread_create(pthread_t *, const pthread_attr_t *, void *(*)(void *),
void *);
int pthread_yield(void);
int pthread_detach(pthread_t);
int pthread_join(pthread_t, void **);
int pthread_equal(pthread_t, pthread_t);
@ -80,6 +82,10 @@ int pthread_mutex_trylock(pthread_mutex_t *);
int pthread_mutex_timedlock(pthread_mutex_t *, const struct timespec *);
int pthread_mutex_destroy(pthread_mutex_t *);
int pthread_mutex_consistent(pthread_mutex_t *);
int pthread_mutexattr_init(pthread_mutexattr_t *);
int pthread_mutexattr_destroy(pthread_mutexattr_t *);
int pthread_mutexattr_gettype(const pthread_mutexattr_t *, int *);
int pthread_mutexattr_settype(pthread_mutexattr_t *, int);
int pthread_cond_init(pthread_cond_t *, const pthread_condattr_t *);
int pthread_cond_destroy(pthread_cond_t *);
int pthread_cond_wait(pthread_cond_t *, pthread_mutex_t *);

View file

@ -1,5 +1,5 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
@ -16,9 +16,14 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.privileged
#include "libc/intrin/pthread.h"
#include "libc/str/str.h"
_spinlock_gettid:
jmp gettid
.endfn _spinlock_gettid,globl
/**
* Destroys mutex.
* @return 0 on success, or error number on failure
*/
int pthread_mutex_destroy(pthread_mutex_t *mutex) {
bzero(mutex, sizeof(*mutex));
return 0;
}

View file

@ -0,0 +1,32 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pthread.h"
#include "libc/str/str.h"
/**
* Initializes mutex.
* @param attr may be NULL
* @return 0 on success, or error number on failure
*/
int pthread_mutex_init(pthread_mutex_t *mutex,
const pthread_mutexattr_t *attr) {
bzero(mutex, sizeof(*mutex));
mutex->attr = attr ? attr->attr : PTHREAD_MUTEX_DEFAULT;
return 0;
}

View file

@ -19,22 +19,29 @@
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/gettls.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/sysv/consts/futex.h"
/**
* Acquires mutex.
* Locks mutex.
* @return 0 on success, or error number on failure
*/
int pthread_mutex_lock(pthread_mutex_t *mutex) {
int me, owner;
unsigned tries;
for (tries = 0, me = gettid();;) {
owner = 0;
if (atomic_compare_exchange_weak(&mutex->owner, &owner, me) ||
owner == me) {
if (atomic_compare_exchange_strong(&mutex->owner, &owner, me)) {
break;
} else if (owner == me) {
if (mutex->attr != PTHREAD_MUTEX_ERRORCHECK) {
break;
} else {
return EDEADLK;
}
}
atomic_fetch_add(&mutex->waits, +1);
if (!IsLinux() || futex((void *)&mutex->owner, FUTEX_WAIT, owner, 0, 0)) {

View file

@ -16,25 +16,25 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/sysv/consts/futex.h"
/**
* Releases mutex.
* @return 0 on success or error number on failure
* @raises EPERM if in error check mode and not owned by caller
*/
int pthread_mutex_unlock(pthread_mutex_t *mutex) {
int owner;
bool shouldunlock;
assert(mutex->reent > 0);
shouldunlock = --mutex->reent <= 0;
assert(mutex->owner == gettid());
if (shouldunlock) {
if (mutex->attr == PTHREAD_MUTEX_ERRORCHECK && mutex->owner != gettid()) {
return EPERM;
}
if (!--mutex->reent) {
atomic_store_explicit(&mutex->owner, 0, memory_order_relaxed);
if (IsLinux() &&
atomic_load_explicit(&mutex->waits, memory_order_acquire)) {

View file

@ -0,0 +1,29 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pthread.h"
#include "libc/str/str.h"
/**
* Destroys mutex attr.
* @return 0 on success, or error number on failure
*/
int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) {
bzero(attr, sizeof(*attr));
return 0;
}

View file

@ -0,0 +1,35 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
/**
* Gets mutex type.
*
* @param type will be set to one of these on success
* - `PTHREAD_MUTEX_NORMAL`
* - `PTHREAD_MUTEX_DEFAULT`
* - `PTHREAD_MUTEX_RECURSIVE`
* - `PTHREAD_MUTEX_ERRORCHECK`
* @return 0 on success, or error on failure
*/
int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type) {
*type = attr->attr;
return 0;
}

View file

@ -0,0 +1,30 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pthread.h"
#include "libc/str/str.h"
/**
* Initializes mutex attr.
* @return 0 on success, or error number on failure
*/
int pthread_mutexattr_init(pthread_mutexattr_t *attr) {
bzero(attr, sizeof(*attr));
attr->attr = PTHREAD_MUTEX_DEFAULT;
return 0;
}

View file

@ -0,0 +1,43 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
/**
* Sets mutex type.
*
* @param type can be one of
* - `PTHREAD_MUTEX_NORMAL`
* - `PTHREAD_MUTEX_DEFAULT`
* - `PTHREAD_MUTEX_RECURSIVE`
* - `PTHREAD_MUTEX_ERRORCHECK`
* @return 0 on success, or error on failure
* @raises EINVAL if `type` is invalid
*/
int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) {
switch (type) {
case PTHREAD_MUTEX_NORMAL:
case PTHREAD_MUTEX_RECURSIVE:
case PTHREAD_MUTEX_ERRORCHECK:
attr->attr = type;
return 0;
default:
return EINVAL;
}
}

View file

@ -34,7 +34,7 @@ const char kConsoleHandles[3] = {
/**
* Puts cmd.exe gui back the way it was.
*/
noasan void __restorewintty(void) {
void __restorewintty(void) {
int i;
if (!IsWindows()) return;
NTTRACE("__restorewintty()");

View file

@ -3,67 +3,40 @@
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § spinlocks
privileged unsophisticated locking subroutines */
fast tiny inline synchronization routines */
#if defined(MODE_DBG) && !defined(_SPINLOCK_DEBUG)
#define _SPINLOCK_DEBUG
#endif
#if defined(_SPINLOCK_DEBUG)
#define _spinlock(lock) _spinlock_ndebug(lock)
#define _spinlock_ndebug(lock) _spinlock_cooperative(lock)
#define _trylock(lock) _trylock_debug(lock)
#define _seizelock(lock) _seizelock_impl(lock, _spinlock_gettid())
#elif defined(TINY)
#define _spinlock(lock) _spinlock_tiny(lock)
#define _spinlock_ndebug(lock) _spinlock_tiny(lock)
#define _trylock(lock) _trylock_inline(lock)
#define _seizelock(lock) _seizelock_impl(lock, 1)
#ifdef TINY
#define _spinlock(lock) _spinlock_tiny(lock)
#else
#define _spinlock(lock) _spinlock_cooperative(lock)
#define _spinlock_ndebug(lock) _spinlock_cooperative(lock)
#define _trylock(lock) _trylock_inline(lock)
#define _seizelock(lock) _seizelock_impl(lock, 1)
#define _spinlock(lock) _spinlock_cooperative(lock)
#endif
#define _trylock_inline(lock) __atomic_test_and_set(lock, __ATOMIC_SEQ_CST)
#define _spunlock(lock) __atomic_store_n(lock, 0, __ATOMIC_RELAXED)
#define _trylock_debug(lock) \
_trylock_debug_4(lock, #lock, __FILE__, __LINE__, __FUNCTION__)
#define _spinlock_debug(lock) \
_spinlock_debug_4(lock, #lock, __FILE__, __LINE__, __FUNCTION__)
#define _spunlock(lock) \
do { \
autotype(lock) __lock = (lock); \
typeof(*__lock) __x = 0; \
__atomic_store(__lock, &__x, __ATOMIC_RELAXED); \
} while (0)
#define _seizelock_impl(lock, value) \
do { \
#define _seizelock(lock, value) \
({ \
autotype(lock) __lock = (lock); \
typeof(*__lock) __x = (value); \
__atomic_store(__lock, &__x, __ATOMIC_RELEASE); \
} while (0)
})
#define _spinlock_tiny(lock) \
do { \
autotype(lock) __lock = (lock); \
while (_trylock_inline(__lock)) { \
__builtin_ia32_pause(); \
} \
} while (0)
#define _spinlock_tiny(lock) \
({ \
autotype(lock) __lock = (lock); \
while (_trylock(__lock)) { \
__builtin_ia32_pause(); \
} \
0; \
})
#define _spinlock_cooperative(lock) \
do { \
({ \
autotype(lock) __lock = (lock); \
typeof(*__lock) __x; \
unsigned __tries = 0; \
for (;;) { \
__atomic_load(__lock, &__x, __ATOMIC_RELAXED); \
if (!__x && !_trylock_inline(__lock)) { \
if (!__x && !_trylock(__lock)) { \
break; \
} else if (++__tries & 7) { \
__builtin_ia32_pause(); \
@ -71,11 +44,11 @@
_spinlock_yield(); \
} \
} \
} while (0)
0; \
})
#define _trylock(lock) __atomic_test_and_set(lock, __ATOMIC_SEQ_CST)
int _spinlock_gettid(void);
int _trylock_debug_4(int *, const char *, const char *, int, const char *);
void _spinlock_debug_4(int *, const char *, const char *, int, const char *);
void _spinlock_yield(void);
#endif /* COSMOPOLITAN_LIBC_INTRIN_SPINLOCK_H_ */

View file

@ -59,6 +59,20 @@ privileged void *__initialize_tls(char tib[64]) {
/**
* Installs thread information block on main process.
*
* For example, to set up TLS correctly for the main thread, without
* creating any threads using `clone` (which does this automatically),
* it is sufficient to say:
*
* __attribute__((__constructor__)) static void InitTls(void) {
* static char tls[64];
* __initialize_tls(tls);
* __threaded = *(int *)(tls + 0x38) = gettid();
* *(int *)(tls + 0x3c) = __errno;
* __install_tls(tls);
* }
*
* Since that'll ensure it happens exactly once.
*/
privileged void __install_tls(char tib[64]) {
int ax, dx;

View file

@ -72,13 +72,33 @@ struct CloneArgs {
void *arg;
};
struct __tfork {
void *tf_tcb;
int32_t *tf_tid;
void *tf_stack;
};
////////////////////////////////////////////////////////////////////////////////
// THREADING RUNTIME
static char tibdefault[64];
extern int __threadcalls_end[];
extern int __threadcalls_start[];
static privileged dontinline void FixupThreadCalls(void) {
/*
* _NOPL("__threadcalls", func)
*
* we have this
*
* 0f 1f 05 b1 19 00 00 nopl func(%rip)
*
* we're going to turn it into this
*
* 67 67 e8 b1 19 00 00 addr32 addr32 call func
*/
__morph_begin();
for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) {
_base[*p + 0] = 0x67;
_base[*p + 1] = 0x67;
_base[*p + 2] = 0xe8;
}
__morph_end();
}
////////////////////////////////////////////////////////////////////////////////
// THE NEW TECHNOLOGY
@ -162,7 +182,9 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
: "rcx", "r11", "memory", "cc");
}
*wt->ctid = tid;
if (wt->ctid) {
*wt->ctid = tid;
}
func(arg);
// we no longer use the stack after this point
// %rax = int bsdthread_terminate(%rdi = void *stackaddr,
@ -266,6 +288,12 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
////////////////////////////////////////////////////////////////////////////////
// OPEN BESIYATA DISHMAYA
struct __tfork {
void *tf_tcb;
int32_t *tf_tid;
void *tf_stack;
};
int __tfork(struct __tfork *params, size_t psize, struct CloneArgs *wt);
asm("__tfork:\n\t"
"push\t$8\n\t"
@ -515,6 +543,9 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
struct CloneArgs *wt;
// transition program to threaded state
if (!__threaded && (flags & CLONE_THREAD)) {
FixupThreadCalls();
}
if ((flags & CLONE_SETTLS) && !__tls_enabled) {
if (~flags & CLONE_THREAD) {
STRACE("clone() tls w/o thread");

View file

@ -1,5 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_RUNTIME_CXAATEXIT_H_
#define COSMOPOLITAN_LIBC_RUNTIME_CXAATEXIT_H_
#include "libc/intrin/nopl.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/stdio/stdio.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
@ -23,8 +24,13 @@ void __cxa_lock(void);
void __cxa_unlock(void);
void __cxa_printexits(FILE *, void *);
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __cxa_lock() _NOPL0("__threadcalls", __cxa_lock)
#define __cxa_unlock() _NOPL0("__threadcalls", __cxa_unlock)
#else
#define __cxa_lock() (__threaded ? __cxa_lock() : 0)
#define __cxa_unlock() (__threaded ? __cxa_unlock() : 0)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -3,6 +3,7 @@
#include "libc/assert.h"
#include "libc/bits/midpoint.h"
#include "libc/dce.h"
#include "libc/intrin/nopl.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/nt/version.h"
@ -67,8 +68,13 @@ void ReleaseMemoryNt(struct MemoryIntervals *, int, int) hidden;
int UntrackMemoryIntervals(void *, size_t) hidden;
size_t GetMemtrackSize(struct MemoryIntervals *);
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __mmi_lock() _NOPL0("__threadcalls", __mmi_lock)
#define __mmi_unlock() _NOPL0("__threadcalls", __mmi_unlock)
#else
#define __mmi_lock() (__threaded ? __mmi_lock() : 0)
#define __mmi_unlock() (__threaded ? __mmi_unlock() : 0)
#endif
#define IsLegalPointer(p) \
(-0x800000000000 <= (intptr_t)(p) && (intptr_t)(p) <= 0x7fffffffffff)

View file

@ -57,9 +57,8 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot,
* @return 0 on success, or -1 w/ errno
*/
privileged void __morph_begin(void) {
sigset_t ss;
sigset_t ss = {{-1, -1}};
if (!IsWindows()) {
sigfillset(&ss);
sys_sigprocmask(SIG_BLOCK, &ss, &oldss);
}
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE,

View file

@ -1,5 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_FFLUSH_H_
#define COSMOPOLITAN_LIBC_STDIO_FFLUSH_H_
#include "libc/intrin/nopl.h"
#include "libc/intrin/pthread.h"
#include "libc/stdio/stdio.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
@ -19,8 +20,14 @@ hidden extern struct StdioFlush __fflush;
void __fflush_lock(void);
void __fflush_unlock(void);
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define __fflush_lock() _NOPL0("__threadcalls", __fflush_lock)
#define __fflush_unlock() _NOPL0("__threadcalls", __fflush_unlock)
#else
#define __fflush_lock() (__threaded ? __fflush_lock() : 0)
#define __fflush_unlock() (__threaded ? __fflush_unlock() : 0)
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -1,6 +1,7 @@
#ifndef COSMOPOLITAN_LIBC_STDIO_STDIO_H_
#define COSMOPOLITAN_LIBC_STDIO_STDIO_H_
#include "libc/fmt/pflink.h"
#include "libc/intrin/nopl.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/symbolic.h"
@ -162,10 +163,6 @@ int vfprintf_unlocked(FILE *, const char *, va_list)
cosmopolitan § standard i/o » optimizations
*/
#define flockfile(f) (__threaded ? flockfile(f) : 0)
#define funlockfile(f) (__threaded ? funlockfile(f) : 0)
#define ftrylockfile(f) (__threaded ? ftrylockfile(f) : 0)
#define getc(f) fgetc(f)
#define getwc(f) fgetwc(f)
#define putc(c, f) fputc(c, f)
@ -176,6 +173,16 @@ int vfprintf_unlocked(FILE *, const char *, va_list)
#define putc_unlocked(c, f) fputc_unlocked(c, f)
#define putwc_unlocked(c, f) fputwc_unlocked(c, f)
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define flockfile(f) _NOPL1("__threadcalls", flockfile, f)
#define funlockfile(f) _NOPL1("__threadcalls", funlockfile, f)
#define ftrylockfile(f) _NOPL1("__threadcalls", ftrylockfile, f)
#else
#define flockfile(f) (__threaded ? flockfile(f) : 0)
#define funlockfile(f) (__threaded ? funlockfile(f) : 0)
#define ftrylockfile(f) (__threaded ? ftrylockfile(f) : 0)
#endif
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
/* clang-format off */
#define printf(FMT, ...) (printf)(PFLINK(FMT), ##__VA_ARGS__)

View file

@ -4,6 +4,7 @@
#define LOCALTIME_IMPLEMENTATION
#include "libc/bits/bits.h"
#include "libc/calls/calls.h"
#include "libc/intrin/nopl.h"
#include "libc/intrin/pthread.h"
#include "libc/intrin/spinlock.h"
#include "libc/mem/mem.h"
@ -46,17 +47,22 @@ STATIC_YOINK("usr/share/zoneinfo/UTC");
static pthread_mutex_t locallock;
static int localtime_lock(void) {
int localtime_lock(void) {
pthread_mutex_lock(&locallock);
return 0;
}
static void localtime_unlock(void) {
void localtime_unlock(void) {
pthread_mutex_unlock(&locallock);
}
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__STRICT_ANSI__)
#define localtime_lock() _NOPL0("__threadcalls", localtime_lock)
#define localtime_unlock() _NOPL0("__threadcalls", localtime_unlock)
#else
#define localtime_lock() (__threaded ? localtime_lock() : 0)
#define localtime_unlock() (__threaded ? localtime_unlock() : 0)
#endif
#ifndef TZ_ABBR_MAX_LEN
#define TZ_ABBR_MAX_LEN 16
@ -1743,7 +1749,7 @@ localtime_timesub(const time_t *timep, int_fast32_t offset,
** Normalize logic courtesy Paul Eggert.
*/
static inline bool
forceinline bool
increment_overflow(int *ip, int j)
{
#if defined(__GNUC__) && __GNUC__ >= 6
@ -1766,7 +1772,7 @@ increment_overflow(int *ip, int j)
#endif
}
static inline bool
forceinline bool
increment_overflow32(int_fast32_t *const lp, int const m)
{
#if defined(__GNUC__) && __GNUC__ >= 6
@ -1783,7 +1789,7 @@ increment_overflow32(int_fast32_t *const lp, int const m)
#endif
}
static inline bool
forceinline bool
increment_overflow_time(time_t *tp, int_fast32_t j)
{
#if defined(__GNUC__) && __GNUC__ >= 6

View file

@ -16,28 +16,88 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/calls/state.internal.h"
#include "libc/errno.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/pthread.h"
#include "libc/intrin/spinlock.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
#include "libc/thread/thread.h"
static void *DoNothing(void *arg) {
static char tls[64];
int PutProcessInThreadingMode(void *p) {
return 0;
}
static void MakeSureThreadingModeIsEnabled(void) {
void *exitcode;
cthread_t thread;
ASSERT_EQ(0, cthread_create(&thread, 0, DoNothing, 0));
ASSERT_EQ(0, cthread_join(thread, 0));
void SetUp(void) {
clone(PutProcessInThreadingMode,
mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0),
GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS,
0, 0, __initialize_tls(tls), sizeof(tls), 0);
}
void SetUp(void) {
MakeSureThreadingModeIsEnabled();
TEST(pthread_mutex_lock, normal) {
pthread_mutex_t lock;
ASSERT_EQ(0, pthread_mutex_init(&lock, 0));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_destroy(&lock));
}
TEST(pthread_mutex_lock, recursive) {
pthread_mutex_t lock;
pthread_mutexattr_t attr;
ASSERT_EQ(0, pthread_mutexattr_init(&attr));
ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_destroy(&lock));
ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
}
TEST(pthread_mutex_lock, errorcheck) {
pthread_mutex_t lock;
pthread_mutexattr_t attr;
ASSERT_EQ(0, pthread_mutexattr_init(&attr));
ASSERT_EQ(0, pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
ASSERT_EQ(0, pthread_mutex_init(&lock, &attr));
ASSERT_EQ(EPERM, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_lock(&lock));
ASSERT_EQ(EDEADLK, pthread_mutex_lock(&lock));
ASSERT_EQ(0, pthread_mutex_unlock(&lock));
ASSERT_EQ(EPERM, pthread_mutex_unlock(&lock));
ASSERT_EQ(0, pthread_mutex_destroy(&lock));
ASSERT_EQ(0, pthread_mutexattr_destroy(&attr));
}
BENCH(pthread_mutex_lock, bench) {
char schar = 0;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
EZBENCH2("pthread_mutex_lock", donothing,
(pthread_mutex_lock(&lock), pthread_mutex_unlock(&lock)));
EZBENCH2("__fds_lock", donothing, (__fds_lock(), __fds_unlock()));
EZBENCH2("_spinlock", donothing, (_spinlock(&schar), _spunlock(&schar)));
EZBENCH2("_spinlock_tiny", donothing,
(_spinlock_tiny(&schar), _spunlock(&schar)));
EZBENCH2("_spinlock_coop", donothing,
(_spinlock_cooperative(&schar), _spunlock(&schar)));
}

View file

@ -16,27 +16,18 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/weaken.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/lockcmpxchgp.h"
#include "libc/intrin/spinlock.h"
#include "libc/log/log.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
privileged int _trylock_debug_4(int *lock, const char *lockname,
const char *file, int line, const char *func) {
int owner = 0;
int me = _spinlock_gettid();
if (_lockcmpxchgp(lock, &owner, me)) {
/**
* Yields current thread's remaining timeslice to operating system.
* @return 0 on success, or error number on failure
*/
int pthread_yield(void) {
if (sched_yield() != -1) {
return 0;
} else if (owner != me) {
return owner;
} else {
kprintf("%s:%d: error: lock re-entry on %s in %s()\n", file, line, lockname,
func);
if (weaken(__die)) weaken(__die)();
__restorewintty();
_Exit(1);
return errno;
}
}

View file

@ -36,7 +36,6 @@ TEST_LIBC_INTRIN_DIRECTDEPS = \
LIBC_STUBS \
LIBC_SYSV \
LIBC_TESTLIB \
LIBC_THREAD \
LIBC_TINYMATH \
LIBC_UNICODE \
LIBC_X \

View file

@ -25,7 +25,6 @@
#include "libc/mem/mem.h"
#include "libc/nexgen32e/gettls.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/symbols.internal.h"
@ -103,7 +102,7 @@ int CloneTest1(void *arg) {
TEST(clone, test1) {
int ptid = 0;
*childetid = -1;
_seizelock(childetid);
_seizelock(childetid, -1);
ASSERT_NE(-1, (tid = clone(CloneTest1, stack, GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_PARENT_SETTID |