Fix breakages in Linux-only build modes

- compile.com now polyfills -march=native which gcc/clang removed
- Guarantee zero Windows code is linked into non-Windows binaries
- MODE=tinylinux binaries are now back to being as tiny as ~4kb
- Improve the runtime's stack allocation / alignment hack
- GitHub Actions now tests Linux modes for assurance
This commit is contained in:
Justine Tunney 2023-07-09 19:47:46 -07:00
parent 0e4c828a8e
commit 3dc86ce154
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
32 changed files with 283 additions and 104 deletions

View file

@ -55,8 +55,10 @@ static errno_t sys_clock_nanosleep(int clock, int flags,
rc = sys_clock_nanosleep_xnu(clock, flags, req, rem);
} else if (IsOpenbsd()) {
rc = sys_clock_nanosleep_openbsd(clock, flags, req, rem);
} else {
} else if (IsWindows()) {
rc = sys_clock_nanosleep_nt(clock, flags, req, rem);
} else {
rc = enosys();
}
if (rc == -1) {
rc = errno;

View file

@ -25,6 +25,7 @@
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/limits.h"
#include "libc/sysv/errfuns.h"
/**
* Returns nice value of thing.
@ -72,8 +73,10 @@ int getpriority(int which, unsigned who) {
errno = rc;
rc = -1;
}
} else {
} else if (IsWindows()) {
rc = sys_getpriority_nt(which, who);
} else {
rc = enosys();
}
#else
rc = sys_getpriority(which, who);

View file

@ -71,14 +71,15 @@ _start:
// align stack to GetStackSize() so GetStackAddr() is fast
.weak ape_stack_memsz
mov $ape_stack_memsz,%r9d
mov $ape_stack_align,%r8d
cmp $_HOSTLINUX,%cl
cmove %r9d,%r8d
mov $16,%r8d
test %r9d,%r9d
cmovnz %r9,%r8
neg %r8
and %r8,%rsp
xor %ebp,%ebp
// bofram 9f
#if SupportsWindows()
// make win32 imps noop
.weak ape_idata_iat
.weak ape_idata_iatend
@ -88,6 +89,7 @@ _start:
sub %rdi,%rcx
shr $3,%ecx
rep stosq
#endif
// scan through environment varis
// find start of auxiliary values

View file

@ -9,7 +9,7 @@ COSMOPOLITAN_C_START_
*/
#if defined(__GNUC__) && defined(__x86_64__) && defined(__MNO_RED_ZONE__) && \
!defined(__STRICT_ANSI__)
!defined(__STRICT_ANSI__) && !defined(__cplusplus)
#define errno \
(*({ \
errno_t *_ep; \

View file

@ -80,7 +80,7 @@
#define FRAMESIZE 0x10000
#define PAGESIZE 0x1000 /* i386+ */
#else
#define APE_STACKSIZE 8388608 /* default 8mb stack */
#define APE_STACKSIZE 4194304 /* default 4mb stack */
#endif
#define APE_PAGESIZE 0x10000 /* i386+ */
#define APE_GUARDSIZE 0x4000 /* b/c apple m1 */

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/nt/runtime.h"
#include "libc/str/str.h"
@ -26,5 +27,6 @@
* @return 0 on success, or error code
*/
int strerror_r(int err, char *buf, size_t size) {
return strerror_wr(err, GetLastError(), buf, size);
int winerr = IsWindows() ? GetLastError() : 0;
return strerror_wr(err, winerr, buf, size);
}

View file

@ -222,7 +222,9 @@ textstartup void __enable_tls(void) {
#ifdef __x86_64__
// rewrite the executable tls opcodes in memory
__morph_tls();
if (IsWindows() || IsXnu()) {
__morph_tls();
}
#endif
// we are now allowed to use tls

View file

@ -76,7 +76,7 @@ _init_check_rdi_rsi:
jne 1b
3: .endfn _init_check_rdi_rsi
#endif
_woot: leave
leave
#elif defined(__aarch64__)
ldp x29,x30,[sp],#16
#endif

View file

@ -45,77 +45,75 @@ privileged void __morph_tls(void) {
// We check `_tls_content` which is generated by the linker script
// since it lets us determine ahead of time if _Thread_local vars
// have actually been linked into this program.
if (IsWindows() || IsXnu()) {
int n;
uint64_t w;
sigset_t mask;
unsigned m, dis;
unsigned char *p;
__morph_begin(&mask);
int n;
uint64_t w;
sigset_t mask;
unsigned m, dis;
unsigned char *p;
__morph_begin(&mask);
if (IsXnu()) {
// Apple is quite straightforward to patch. We basically
// just change the segment register, and the linear slot
// address 0x30 was promised to us, according to Go team
// https://github.com/golang/go/issues/23617
dis = 0x30;
} else {
// MSVC __declspec(thread) generates binary code for this
// %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
// times we should be good.
dis = 0x1480 + __tls_index * 8;
}
// iterate over modifiable code looking for 9 byte instruction
// this would take 30 ms using xed to enable tls on python.com
for (p = _ereal; p + 9 <= __privileged_start; p += n) {
// use sse to zoom zoom to fs register prefixes
// that way it'll take 1 ms to morph python.com
while (p + 9 + 16 <= __privileged_start) {
if ((m = __builtin_ia32_pmovmskb128(
*(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144,
0144, 0144, 0144, 0144, 0144, 0144,
0144, 0144, 0144, 0144}))) {
m = __builtin_ctzll(m);
p += m;
break;
} else {
p += 16;
}
}
// we're checking for the following expression:
// 0144 == p[0] && // %fs
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
// 0000 == p[5] && // displacement (von Neumann endian)
// 0000 == p[6] && // displacement
// 0000 == p[7] && // displacement
// 0000 == p[8] // displacement
w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
!p[8]) {
// now change the code
p[0] = 0145; // change %fs to %gs
p[5] = (dis & 0x000000ff) >> 000; // displacement
p[6] = (dis & 0x0000ff00) >> 010; // displacement
p[7] = (dis & 0x00ff0000) >> 020; // displacement
p[8] = (dis & 0xff000000) >> 030; // displacement
// advance to the next instruction
n = 9;
} else {
n = 1;
}
}
__morph_end(&mask);
if (IsXnu()) {
// Apple is quite straightforward to patch. We basically
// just change the segment register, and the linear slot
// address 0x30 was promised to us, according to Go team
// https://github.com/golang/go/issues/23617
dis = 0x30;
} else {
// MSVC __declspec(thread) generates binary code for this
// %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
// times we should be good.
dis = 0x1480 + __tls_index * 8;
}
// iterate over modifiable code looking for 9 byte instruction
// this would take 30 ms using xed to enable tls on python.com
for (p = _ereal; p + 9 <= __privileged_start; p += n) {
// use sse to zoom zoom to fs register prefixes
// that way it'll take 1 ms to morph python.com
while (p + 9 + 16 <= __privileged_start) {
if ((m = __builtin_ia32_pmovmskb128(
*(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144, 0144,
0144, 0144, 0144, 0144, 0144, 0144, 0144,
0144, 0144}))) {
m = __builtin_ctzll(m);
p += m;
break;
} else {
p += 16;
}
}
// we're checking for the following expression:
// 0144 == p[0] && // %fs
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
// 0000 == p[5] && // displacement (von Neumann endian)
// 0000 == p[6] && // displacement
// 0000 == p[7] && // displacement
// 0000 == p[8] // displacement
w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
!p[8]) {
// now change the code
p[0] = 0145; // change %fs to %gs
p[5] = (dis & 0x000000ff) >> 000; // displacement
p[6] = (dis & 0x0000ff00) >> 010; // displacement
p[7] = (dis & 0x00ff0000) >> 020; // displacement
p[8] = (dis & 0xff000000) >> 030; // displacement
// advance to the next instruction
n = 9;
} else {
n = 1;
}
}
__morph_end(&mask);
#endif
}

View file

@ -1410,6 +1410,7 @@ err:
return -1;
}
#if SupportsWindows()
textwindows int sys_close_epoll_nt(int fd) {
struct PortState *port_state;
struct TsTreeNode *tree_node;
@ -1427,6 +1428,7 @@ err:
err_check_handle(g_fds.p[fd].handle);
return -1;
}
#endif
/**
* Creates new epoll instance.

View file

@ -223,7 +223,9 @@ void testlib_runtestcases(testfn_t *start, testfn_t *end, testfn_t warmup) {
if (_weaken(testlib_enable_tmp_setup_teardown)) SetupTmpDir();
if (_weaken(SetUp)) _weaken(SetUp)();
errno = 0;
SetLastError(0);
if (IsWindows()) {
SetLastError(0);
}
if (!IsWindows()) sys_getpid();
if (warmup) warmup();
testlib_clearxmmregisters();