cosmopolitan/libc/intrin/nopl.internal.h
2022-09-10 12:03:36 -07:00

71 lines
4 KiB
C

#ifndef COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#define COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0) && defined(__GNUC__) && \
!defined(__llvm__) && !defined(__chibicc__) && !defined(__STRICT_ANSI__)
/**
* @fileoverview Turns CALLs into NOPs that are fixupable at runtime.
*
* Things like lock/unlock function calls can take on average 100ms.
* Libc needs to use internal locking pervasively in order to support
* threads. So there's a lot of cost everywhere, even though most progs
* don't use threads. In ANSI mode we dispatching (__threaded && lock())
* to solve this problem, but if we write lock statements that way, it
* adds a lot of bloat to the functions that call locking routines. So
* what we do here is replace the CALL instruction with NOP, which keeps
* the code just as fast as inlining, while making code size 10x tinier.
*/
#define _NOPL_PROLOGUE(SECTION) \
".section \".sort.rodata." SECTION ".1" \
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
".align\t4\n\t" \
".type\t\"" SECTION "_start\",@object\n\t" \
".globl\t\"" SECTION "_start\"\n\t" \
".equ\t\"" SECTION "_start\",.\n\t" \
".previous\n\t"
#define _NOPL_EPILOGUE(SECTION) \
".section \".sort.rodata." SECTION ".3" \
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
".align\t4\n\t" \
".type\"" SECTION "_end\",@object\n\t" \
".globl\t\"" SECTION "_end\"\n\t" \
".equ\t\"" SECTION "_end\",.\n\t" \
".previous\n\t"
#define _NOPL0(SECTION, FUNC) \
({ \
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
_NOPL_EPILOGUE(SECTION) /* */ \
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
".align\t4\n\t" \
".long\t353f-%a1\n\t" \
".previous\n353:\t" \
"nopl\t%a0" \
: /* no inputs */ \
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
: "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", \
"r11", "memory", "cc"); \
(void)0; \
})
#define _NOPL1(SECTION, FUNC, ARG) \
({ \
register autotype(ARG) __arg asm("rdi") = ARG; \
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
_NOPL_EPILOGUE(SECTION) /* */ \
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
".align\t4\n\t" \
".long\t353f-%a2\n\t" \
".previous\n353:\t" \
"nopl\t%a1" \
: "+D"(__arg) \
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
: "rax", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", \
"memory", "cc"); \
(void)0; \
})
#endif /* !ASSEMBLER && !LINKER && GNUC && !CHIBICC && !LLVM && !ANSI */
#endif /* COSMOPOLITAN_LIBC_INTRIN_NOPL_H_ */