mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
71 lines
4 KiB
C
71 lines
4 KiB
C
#ifndef COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
|
|
#define COSMOPOLITAN_LIBC_INTRIN_NOPL_H_
|
|
#if !(__ASSEMBLER__ + __LINKER__ + 0) && defined(__GNUC__) && \
|
|
!defined(__llvm__) && !defined(__chibicc__) && !defined(__STRICT_ANSI__)
|
|
|
|
/**
|
|
* @fileoverview Turns CALLs into NOPs that are fixupable at runtime.
|
|
*
|
|
* Things like lock/unlock function calls can take on average 100ms.
|
|
* Libc needs to use internal locking pervasively in order to support
|
|
* threads. So there's a lot of cost everywhere, even though most progs
|
|
* don't use threads. In ANSI mode we dispatching (__threaded && lock())
|
|
* to solve this problem, but if we write lock statements that way, it
|
|
* adds a lot of bloat to the functions that call locking routines. So
|
|
* what we do here is replace the CALL instruction with NOP, which keeps
|
|
* the code just as fast as inlining, while making code size 10x tinier.
|
|
*/
|
|
|
|
#define _NOPL_PROLOGUE(SECTION) \
|
|
".section \".sort.rodata." SECTION ".1" \
|
|
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
|
|
".align\t4\n\t" \
|
|
".type\t\"" SECTION "_start\",@object\n\t" \
|
|
".globl\t\"" SECTION "_start\"\n\t" \
|
|
".equ\t\"" SECTION "_start\",.\n\t" \
|
|
".previous\n\t"
|
|
|
|
#define _NOPL_EPILOGUE(SECTION) \
|
|
".section \".sort.rodata." SECTION ".3" \
|
|
"\",\"aG\",@progbits,\"" SECTION "\",comdat\n\t" \
|
|
".align\t4\n\t" \
|
|
".type\"" SECTION "_end\",@object\n\t" \
|
|
".globl\t\"" SECTION "_end\"\n\t" \
|
|
".equ\t\"" SECTION "_end\",.\n\t" \
|
|
".previous\n\t"
|
|
|
|
#define _NOPL0(SECTION, FUNC) \
|
|
({ \
|
|
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
|
|
_NOPL_EPILOGUE(SECTION) /* */ \
|
|
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
|
|
".align\t4\n\t" \
|
|
".long\t353f-%a1\n\t" \
|
|
".previous\n353:\t" \
|
|
"nopl\t%a0" \
|
|
: /* no inputs */ \
|
|
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
|
|
: "rax", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "r10", \
|
|
"r11", "memory", "cc"); \
|
|
(void)0; \
|
|
})
|
|
|
|
#define _NOPL1(SECTION, FUNC, ARG) \
|
|
({ \
|
|
register autotype(ARG) __arg asm("rdi") = ARG; \
|
|
asm volatile(_NOPL_PROLOGUE(SECTION) /* */ \
|
|
_NOPL_EPILOGUE(SECTION) /* */ \
|
|
".section \".sort.rodata." SECTION ".2\",\"a\",@progbits\n\t" \
|
|
".align\t4\n\t" \
|
|
".long\t353f-%a2\n\t" \
|
|
".previous\n353:\t" \
|
|
"nopl\t%a1" \
|
|
: "+D"(__arg) \
|
|
: "X"(FUNC), "X"(IMAGE_BASE_VIRTUAL) \
|
|
: "rax", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", \
|
|
"memory", "cc"); \
|
|
(void)0; \
|
|
})
|
|
|
|
#endif /* !ASSEMBLER && !LINKER && GNUC && !CHIBICC && !LLVM && !ANSI */
|
|
#endif /* COSMOPOLITAN_LIBC_INTRIN_NOPL_H_ */
|