diff --git a/build/config.mk b/build/config.mk index c35a55a01..fcee06d5f 100644 --- a/build/config.mk +++ b/build/config.mk @@ -130,6 +130,12 @@ TARGET_ARCH ?= -msse3 OVERRIDE_CCFLAGS += -fno-pie endif +ifeq ($(MODE), aarch64-dbg) +CONFIG_CPPFLAGS += -DMODE_DBG +CONFIG_CCFLAGS += $(BACKTRACES) $(FTRACE) -DSYSDEBUG -O -fno-inline +CONFIG_COPTS += -fsanitize=undefined +endif + # System Five Mode # # - `make MODE=sysv` diff --git a/build/rules.mk b/build/rules.mk index efdf3ff68..2ad706a96 100644 --- a/build/rules.mk +++ b/build/rules.mk @@ -88,21 +88,21 @@ o/$(MODE)/%.pkg: $(file >$(TMPSAFE).args,$(filter %.o,$^)) @$(COMPILE) -APACKAGE -wT$@ $(PKG) $(OUTPUT_OPTION) $(addprefix -d,$(filter %.pkg,$^)) @$(TMPSAFE).args -o/$(MODE)/%.o: %.py o/$(MODE)/third_party/python/pyobj.com - @$(COMPILE) -wAPYOBJ o/$(MODE)/third_party/python/pyobj.com $(PYFLAGS) -o $@ $< +o/$(MODE)/%.o: %.py o/$(MODE)/third_party/python/pyobj.com $(VM) + @$(COMPILE) -wAPYOBJ $(VM) o/$(MODE)/third_party/python/pyobj.com $(PYFLAGS) -o $@ $< -o/$(MODE)/%.pyc: %.py o/$(MODE)/third_party/python/pycomp.com - @$(COMPILE) -wAPYCOMP o/$(MODE)/third_party/python/pycomp.com $(PYCFLAGS) -o $@ $< +o/$(MODE)/%.pyc: %.py o/$(MODE)/third_party/python/pycomp.com $(VM) + @$(COMPILE) -wAPYCOMP $(VM) o/$(MODE)/third_party/python/pycomp.com $(PYCFLAGS) -o $@ $< -o/$(MODE)/%.lua: %.lua o/$(MODE)/third_party/lua/luac.com - @$(COMPILE) -wALUAC o/$(MODE)/third_party/lua/luac.com -s -o $@ $< +o/$(MODE)/%.lua: %.lua o/$(MODE)/third_party/lua/luac.com $(VM) + @$(COMPILE) -wALUAC $(VM) o/$(MODE)/third_party/lua/luac.com -s -o $@ $< -o/$(MODE)/%.lua.runs: %.lua o/$(MODE)/tool/net/redbean.com - @$(COMPILE) -wALUA -tT$@ o/$(MODE)/tool/net/redbean.com $(LUAFLAGS) -i $< +o/$(MODE)/%.lua.runs: %.lua o/$(MODE)/tool/net/redbean.com $(VM) + @$(COMPILE) -wALUA -tT$@ $(VM) o/$(MODE)/tool/net/redbean.com $(LUAFLAGS) -i $< -o/$(MODE)/%: o/$(MODE)/%.com o/$(MODE)/tool/build/cp.com o/$(MODE)/tool/build/assimilate.com - @$(COMPILE) -wACP -T$@ o/$(MODE)/tool/build/cp.com $< $@ - @$(COMPILE) -wAASSIMILATE -T$@ o/$(MODE)/tool/build/assimilate.com $@ +o/$(MODE)/%: o/$(MODE)/%.com o/$(MODE)/tool/build/cp.com o/$(MODE)/tool/build/assimilate.com $(VM) + @$(COMPILE) -wACP -T$@ $(VM) o/$(MODE)/tool/build/cp.com $< $@ + @$(COMPILE) -wAASSIMILATE -T$@ $(VM) o/$(MODE)/tool/build/assimilate.com $@ ################################################################################ # LOCAL UNIT TESTS @@ -205,12 +205,14 @@ MAKE_OBJCOPY = \ MAKE_SYMTAB_CREATE = \ $(COMPILE) -wASYMTAB \ + $(VM) \ o/$(MODE)/tool/build/symtab.com \ -o $(TMPSAFE)/.symtab \ $< MAKE_SYMTAB_ZIP = \ $(COMPILE) -AZIP -T$@ \ + $(VM) \ o/$(MODE)/third_party/zip/zip.com \ -b$(TMPDIR) \ -9qj \ diff --git a/dsp/tty/tty.mk b/dsp/tty/tty.mk index e7a04f583..968e7d092 100644 --- a/dsp/tty/tty.mk +++ b/dsp/tty/tty.mk @@ -54,9 +54,16 @@ $(DSP_TTY_A).pkg: \ $(foreach x,$(DSP_TTY_A_DIRECTDEPS),$($(x)_A).pkg) o/$(MODE)/dsp/tty/ttyraster.o: private \ - OVERRIDE_CFLAGS += \ + OVERRIDE_CFLAGS += \ $(MATHEMATICAL) +ifeq ($(ARCH), aarch64) +# takes 14 seconds to compile with aarch64 gcc +o/$(MODE)/dsp/tty/ttyraster.o: private \ + OVERRIDE_CFLAGS += \ + -O1 +endif + DSP_TTY_LIBS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x))) DSP_TTY_SRCS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_SRCS)) DSP_TTY_HDRS = $(foreach x,$(DSP_TTY_ARTIFACTS),$($(x)_HDRS)) diff --git a/libc/runtime/__utmpxname.S b/dsp/tty/windex.c similarity index 68% rename from libc/runtime/__utmpxname.S rename to dsp/tty/windex.c index e8c30b239..afdefe8ff 100644 --- a/libc/runtime/__utmpxname.S +++ b/dsp/tty/windex.c @@ -1,7 +1,7 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,13 +16,26 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" +#include "dsp/tty/windex.h" +#include "dsp/tty/tty.h" +#include "libc/nexgen32e/x86feature.h" -__utmpxname: - .errno - mov ENOTSUP(%rip),%edx - mov %edx,(%rax) - ret - .endfn __utmpxname,globl - .alias __utmpxname,utmpname - .alias __utmpxname,utmpxname +extern unsigned windex_avx2(uint16_t *, size_t); +extern unsigned windex_sse4(uint16_t *, size_t); +extern unsigned windex_k8(uint16_t *, size_t); + +unsigned (*windex)(uint16_t *, size_t); + +__attribute__((__constructor__)) static void init_windex(void) { +#ifdef __x86_64__ + if (X86_HAVE(AVX2)) { + windex = windex_avx2; + } else if (X86_HAVE(SSE4_2)) { + windex = windex_sse4; + } else { + windex = windex_k8; + } +#else + windex = windex_k8; +#endif +} diff --git a/dsp/tty/windex.h b/dsp/tty/windex.h index 95b26cd36..0effa18bd 100644 --- a/dsp/tty/windex.h +++ b/dsp/tty/windex.h @@ -3,7 +3,7 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -extern unsigned (*const windex)(uint16_t *, size_t); +extern unsigned (*windex)(uint16_t *, size_t); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/examples/auto-launch-gdb-on-crash.c b/examples/auto-launch-gdb-on-crash.c index c982b888e..efd2070ed 100644 --- a/examples/auto-launch-gdb-on-crash.c +++ b/examples/auto-launch-gdb-on-crash.c @@ -41,6 +41,6 @@ int main(int argc, char *argv[]) { ShowCrashReports(); - asm("int3"); /* cf. __die(), perror("msg"), abort(), exit(1), _Exit(1) */ + __builtin_trap(); return 0; } diff --git a/examples/breakpoint.c b/examples/breakpoint.c index c4de23644..199345366 100644 --- a/examples/breakpoint.c +++ b/examples/breakpoint.c @@ -23,13 +23,7 @@ int main(int argc, char *argv[]) { kprintf("try running: o//tool/build/strace.com %s%n", argv[0]); } - asm volatile("mov\t%4,%%r10\n\t" - "mov\t%5,%%r8\n\t" - "mov\t%6,%%r9\n\t" - "int3" - : /* no outputs */ - : "a"(0), "D"(1), "S"(2), "d"(3), "g"(4), "g"(5), "g"(6) - : "r8", "r9", "r10"); + __builtin_trap(); printf("recovered from SIGTRAP without handler\r\n"); return 0; diff --git a/examples/package/lib/build.mk b/examples/package/lib/build.mk index 32b74fd40..33ee311e8 100644 --- a/examples/package/lib/build.mk +++ b/examples/package/lib/build.mk @@ -96,6 +96,10 @@ $(EXAMPLES_PACKAGE_LIB_A).pkg: \ # Invalidates objects in package when makefile is edited. $(EXAMPLES_PACKAGE_LIB_A_OBJS): examples/package/lib/build.mk +# let these assembly objects build on aarch64 +o/$(MODE)/examples/package/lib/myasm.o: examples/package/lib/myasm.S + @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< + EXAMPLES_PACKAGE_LIB_LIBS = $(foreach x,$(EXAMPLES_PACKAGE_LIB_ARTIFACTS),$($(x))) EXAMPLES_PACKAGE_LIB_SRCS = $(foreach x,$(EXAMPLES_PACKAGE_LIB_ARTIFACTS),$($(x)_SRCS)) EXAMPLES_PACKAGE_LIB_HDRS = $(foreach x,$(EXAMPLES_PACKAGE_LIB_ARTIFACTS),$($(x)_HDRS)) diff --git a/examples/package/lib/myasm.S b/examples/package/lib/myasm.S index 1711187fc..acb21b98e 100644 --- a/examples/package/lib/myasm.S +++ b/examples/package/lib/myasm.S @@ -8,9 +8,18 @@ // somehow they usually make code faster // it's convention for keeping stack 16-byte aligned // cpus still devote much to pushing & popping b/c i386 -MyAsm: push %rbp +MyAsm: + +#ifdef __x86_64__ + push %rbp mov %rsp,%rbp call MyPrint2 pop %rbp +#elif defined(__aarch64__) + bl MyPrint2 +#else +#error "unsupported architecture" +#endif + ret .endfn MyAsm,globl diff --git a/examples/ucontext-sigfpe-recovery.c b/examples/ucontext-sigfpe-recovery.c index ccfbad46d..745dcbb2e 100644 --- a/examples/ucontext-sigfpe-recovery.c +++ b/examples/ucontext-sigfpe-recovery.c @@ -16,6 +16,8 @@ #include "libc/sysv/consts/sig.h" #include "third_party/xed/x86.h" +#ifdef __x86_64__ + /** * @fileoverview How to change CPU state on signal delivery * @@ -43,3 +45,5 @@ int main(int argc, char *argv[]) { printf("123/0 = %ld\n", 123 / x); return 0; } + +#endif /* __x86_64__ */ diff --git a/examples/vga.c b/examples/vga.c index 21f68017e..f26d4a6a1 100644 --- a/examples/vga.c +++ b/examples/vga.c @@ -15,6 +15,8 @@ #include "libc/str/str.h" #include "libc/sysv/consts/termios.h" +#ifdef __x86_64__ + /** * @fileoverview Bare Metal VGA TTY demo. * @@ -58,3 +60,5 @@ int main(int argc, char *argv[]) { } } } + +#endif /* __x86_64__ */ diff --git a/examples/vga2.c b/examples/vga2.c index ab846106d..859b14364 100644 --- a/examples/vga2.c +++ b/examples/vga2.c @@ -15,6 +15,8 @@ #include "libc/str/str.h" #include "libc/sysv/consts/termios.h" +#ifdef __x86_64__ + /** * @fileoverview Demo of program crash reporting with Bare Metal VGA TTY. * @@ -35,5 +37,8 @@ int main(int argc, char *argv[]) { printf("argv[%d] = \"%s\"\n", i, argv[i]); } printf("\e[92;44mHello World!\e[0m %d\n", 1 / (x + y - 3)); - for (;;); + for (;;) + ; } + +#endif /* __x86_64__ */ diff --git a/examples/wall.c b/examples/wall.c index 1e115960b..95287f59c 100644 --- a/examples/wall.c +++ b/examples/wall.c @@ -132,12 +132,12 @@ int main(int argc, char *argv[]) { appends(&msg, "\r\n\e[K\e[0m\e8"); // restore // try to send message to all pseudoteletypewriters - for (int i = 0;; ++i) { + for (int i = 0; i < 100; ++i) { int fd; char pts[32]; snprintf(pts, sizeof(pts), "/dev/pts/%d", i); if ((fd = open(pts, O_WRONLY | O_NOCTTY)) == -1) { - if (errno == ENOENT) break; + if (errno == ENOENT) continue; if (g_verbose) perror(pts); } write(fd, msg, appendz(msg).i); diff --git a/libc/calls/openat.c b/libc/calls/openat.c index fe73792f2..34a1e4549 100644 --- a/libc/calls/openat.c +++ b/libc/calls/openat.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/cp.internal.h" #include "libc/calls/internal.h" diff --git a/libc/calls/pledge-linux.c b/libc/calls/pledge-linux.c index f3929309f..b4a3d0b90 100644 --- a/libc/calls/pledge-linux.c +++ b/libc/calls/pledge-linux.c @@ -1254,23 +1254,21 @@ static privileged int HasSyscall(struct Pledges *p, uint16_t n) { static privileged void OnSigSys(int sig, siginfo_t *si, void *vctx) { bool found; - char ord[17], rip[17]; + char ord[17]; int i, ok, mode = si->si_errno; ucontext_t *ctx = vctx; ctx->uc_mcontext.MCONTEXT_SYSCALL_RESULT_REGISTER = -Eperm; FixCpy(ord, si->si_syscall, 12); - HexCpy(rip, ctx->uc_mcontext.MCONTEXT_INSTRUCTION_POINTER); for (found = i = 0; i < ARRAYLEN(kPledge); ++i) { if (HasSyscall(kPledge + i, si->si_syscall)) { Log("error: pledge ", kPledge[i].name, " for ", - GetSyscallName(si->si_syscall), " (ord=", ord, " rip=", rip, ")\n", - NULL); + GetSyscallName(si->si_syscall), " (ord=", ord, ")\n", NULL); found = true; } } if (!found) { Log("error: bad syscall (", GetSyscallName(si->si_syscall), " ord=", ord, - " rip=", rip, ")\n", NULL); + ")\n", NULL); } switch (mode & PLEDGE_PENALTY_MASK) { case PLEDGE_PENALTY_KILL_PROCESS: diff --git a/libc/calls/prctl.c b/libc/calls/prctl.c index c8f90d67e..97f84d64b 100644 --- a/libc/calls/prctl.c +++ b/libc/calls/prctl.c @@ -42,8 +42,8 @@ privileged int prctl(int operation, ...) { d = va_arg(va, intptr_t); va_end(va); -#ifdef __x86_64__ if (IsLinux()) { +#ifdef __x86_64__ asm volatile("mov\t%5,%%r10\n\t" "mov\t%6,%%r8\n\t" "syscall" @@ -51,25 +51,25 @@ privileged int prctl(int operation, ...) { : "0"(157), "D"(operation), "S"(a), "d"(b), "g"(c), "g"(d) : "rcx", "r8", "r10", "r11", "memory"); if (rc > -4096u) errno = -rc, rc = -1; - } else { - rc = enosys(); - } #elif defined(__aarch64__) - register long r0 asm("x0") = (long)operation; - register long r1 asm("x1") = (long)a; - register long r2 asm("x2") = (long)b; - register long r3 asm("x3") = (long)c; - register long r4 asm("x4") = (long)d; - register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res_x0) - : "i"(167), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) - : "x8", "memory"); - rc = _sysret(res_x0); + register long r0 asm("x0") = (long)operation; + register long r1 asm("x1") = (long)a; + register long r2 asm("x2") = (long)b; + register long r3 asm("x3") = (long)c; + register long r4 asm("x4") = (long)d; + register long res_x0 asm("x0"); + asm volatile("mov\tx8,%1\n\t" + "svc\t0" + : "=r"(res_x0) + : "i"(167), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) + : "x8", "memory"); + rc = _sysret(res_x0); #else #error "arch unsupported" #endif + } else { + rc = enosys(); + } #ifdef SYSDEBUG if (operation == PR_CAPBSET_READ || operation == PR_CAPBSET_DROP) { diff --git a/libc/calls/seccomp.c b/libc/calls/seccomp.c index 5b845be35..65ce5ad8f 100644 --- a/libc/calls/seccomp.c +++ b/libc/calls/seccomp.c @@ -37,8 +37,8 @@ */ privileged int seccomp(unsigned operation, unsigned flags, void *args) { int rc; -#ifdef __x86_64__ if (IsLinux()) { +#ifdef __x86_64__ asm volatile("syscall" : "=a"(rc) : "0"(317), "D"(operation), "S"(flags), "d"(args) @@ -61,23 +61,23 @@ privileged int seccomp(unsigned operation, unsigned flags, void *args) { errno = -rc; rc = -1; } - } else { - rc = enosys(); - } #elif defined(__aarch64__) - register long r0 asm("x0") = (long)operation; - register long r1 asm("x1") = (long)flags; - register long r2 asm("x2") = (long)args; - register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res_x0) - : "i"(211), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); - rc = _sysret(res_x0); + register long r0 asm("x0") = (long)operation; + register long r1 asm("x1") = (long)flags; + register long r2 asm("x2") = (long)args; + register long res_x0 asm("x0"); + asm volatile("mov\tx8,%1\n\t" + "svc\t0" + : "=r"(res_x0) + : "i"(211), "r"(r0), "r"(r1), "r"(r2) + : "x8", "memory"); + rc = _sysret(res_x0); #else #error "arch unsupported" #endif + } else { + rc = enosys(); + } STRACE("seccomp(%s, %#x, %p) → %d% m", DescribeSeccompOperation(operation), flags, args, rc); return rc; diff --git a/libc/calls/unveil.c b/libc/calls/unveil.c index 05817da7b..5b0334de3 100644 --- a/libc/calls/unveil.c +++ b/libc/calls/unveil.c @@ -49,6 +49,12 @@ #include "libc/thread/tls.h" #ifdef __x86_64__ +#define ARCHITECTURE AUDIT_ARCH_X86_64 +#elif defined(__aarch64__) +#define ARCHITECTURE AUDIT_ARCH_AARCH64 +#else +#error "unsupported architecture" +#endif #define OFF(f) offsetof(struct seccomp_data, f) @@ -70,7 +76,7 @@ static const struct sock_filter kUnveilBlacklistAbiVersionBelow3[] = { BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)), - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, ARCHITECTURE, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_truncate, 1, 0), @@ -81,7 +87,7 @@ static const struct sock_filter kUnveilBlacklistAbiVersionBelow3[] = { static const struct sock_filter kUnveilBlacklistLatestAbi[] = { BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)), - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, ARCHITECTURE, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_setxattr, 0, 1), @@ -402,5 +408,3 @@ int unveil(const char *path, const char *permissions) { STRACE("unveil(%#s, %#s) → %d% m", path, permissions, rc); return rc; } - -#endif /* __x86_64__ */ diff --git a/libc/fmt/magnumstrs.internal.h b/libc/fmt/magnumstrs.internal.h index 8d20edebf..256ed829b 100644 --- a/libc/fmt/magnumstrs.internal.h +++ b/libc/fmt/magnumstrs.internal.h @@ -26,6 +26,7 @@ _Hide extern const struct MagnumStr kRlimitNames[]; _Hide extern const struct MagnumStr kSignalNames[]; _Hide extern const struct MagnumStr kSockOptnames[]; _Hide extern const struct MagnumStr kTcpOptnames[]; +_Hide extern const struct MagnumStr kPollNames[]; char *GetMagnumStr(const struct MagnumStr *, int); char *DescribeMagnum(char *, const struct MagnumStr *, const char *, int); diff --git a/libc/intrin/asan.c b/libc/intrin/asan.c index 3131fb230..ce54a162b 100644 --- a/libc/intrin/asan.c +++ b/libc/intrin/asan.c @@ -47,10 +47,9 @@ #include "libc/sysv/errfuns.h" #include "libc/thread/tls.h" #include "third_party/dlmalloc/dlmalloc.h" - #ifdef __x86_64__ + STATIC_YOINK("_init_asan"); -#endif #if IsModeDbg() // MODE=dbg @@ -1505,3 +1504,5 @@ void __asan_init(int argc, char **argv, char **envp, intptr_t *auxv) { STRACE("/_/ \\_\\____/_/ \\_\\_| \\_|"); STRACE("cosmopolitan memory safety module initialized"); } + +#endif /* __x86_64__ */ diff --git a/libc/intrin/asanthunk.c b/libc/intrin/asanthunk.c index 4caf5be7c..93b397f26 100644 --- a/libc/intrin/asanthunk.c +++ b/libc/intrin/asanthunk.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#ifdef __x86_64__ void __asan_report_load(void *, int); void __asan_report_store(void *, int); @@ -171,3 +172,5 @@ void __asan_store16() { void __asan_store32() { __builtin_trap(); } + +#endif /* __x86_64__ */ diff --git a/libc/intrin/intrin.mk b/libc/intrin/intrin.mk index b46291faa..57d9ebf50 100644 --- a/libc/intrin/intrin.mk +++ b/libc/intrin/intrin.mk @@ -78,6 +78,14 @@ o/$(MODE)/libc/intrin/asan.o: private \ -finline \ -finline-functions +o/$(MODE)/libc/intrin/asanthunk.o: private \ + OVERRIDE_CFLAGS += \ + -x-no-pg \ + $(MNO_FENTRY) \ + -ffreestanding \ + -fno-sanitize=all \ + -fno-stack-protector + # we can't use compiler magic because: # kprintf() is mission critical to error reporting o/$(MODE)/libc/intrin/getmagnumstr.greg.o \ diff --git a/dsp/tty/windex.S b/libc/intrin/kpollnames.S similarity index 77% rename from dsp/tty/windex.S rename to libc/intrin/kpollnames.S index df112ec85..d65af6894 100644 --- a/dsp/tty/windex.S +++ b/libc/intrin/kpollnames.S @@ -16,26 +16,31 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/x86feature.h" +#include "libc/fmt/magnumstrs.internal.h" #include "libc/macros.internal.h" -// Dispatches to fastest windex() implementation. - .initbss 300,_init_windex -windex: .quad 0 - .endobj windex,globl + .macro .e e s + .long \e - kPollNames + .long .L\@ - kPollNames + .rodata.str1.1 +.L\@: .string "\s" .previous + .endm - .init.start 300,_init_windex - ezlea windex_avx2,ax -#if !X86_NEED(AVX2) - ezlea windex_sse4,dx - testb X86_HAVE(AVX2)+kCpuids(%rip) - cmovz %rdx,%rax -#endif /* AVX2 */ -#if !X86_NEED(SSE4_2) - ezlea windex_k8,dx - testb X86_HAVE(SSE4_2)+kCpuids(%rip) - cmovz %rdx,%rax -#endif /* SSE4 */ - stosq - .init.end 300,_init_windex + .section .rodata,"a",@progbits + .balign 4 + .underrun +kPollNames: + .e POLLNVAL "POLLNVAL" + .e POLLWRNORM "POLLWRNORM" + .e POLLWRBAND "POLLWRBAND" + .e POLLRDNORM "POLLRDNORM" + .e POLLRDHUP "POLLRDHUP" + .e POLLRDBAND "POLLRDBAND" + .e POLLHUP "POLLHUP" + .e POLLERR "POLLERR" + .e POLLPRI "POLLPRI" + .e POLLOUT "POLLOUT" + .e POLLIN "POLLIN" + .endobj kPollNames,globl,hidden + .overrun diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index 2a412dba5..9728686b8 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -48,6 +48,7 @@ #include "libc/runtime/internal.h" #include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" +#include "libc/runtime/stack.h" #include "libc/runtime/symbols.internal.h" #include "libc/str/str.h" #include "libc/str/tab.internal.h" @@ -173,12 +174,16 @@ privileged bool kisdangerous(const void *p) { int frame; if (kisimagepointer(p)) return false; if (kiskernelpointer(p)) return false; + if (IsOldStack(p)) return false; if (IsLegalPointer(p)) { - frame = (intptr_t)p >> 16; + frame = (uintptr_t)p >> 16; if (IsStackFrame(frame)) return false; - if (IsOldStackFrame(frame)) return false; if (kismapped(frame)) return false; } + if (GetStackAddr() + GUARDSIZE <= (uintptr_t)p && + (uintptr_t)p < GetStackAddr() + GetStackSize()) { + return false; + } return true; } @@ -219,12 +224,12 @@ privileged static void klog(const char *b, size_t n) { register long r0 asm("x0") = (long)2; register long r1 asm("x1") = (long)b; register long r2 asm("x2") = (long)n; + register long r8 asm("x8") = (long)__NR_write; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(64), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); #else #error "unsupported architecture" #endif diff --git a/third_party/compiler_rt/lshrti3.c b/libc/intrin/lshrti3.c similarity index 96% rename from third_party/compiler_rt/lshrti3.c rename to libc/intrin/lshrti3.c index 7deb749e3..aee794749 100644 --- a/third_party/compiler_rt/lshrti3.c +++ b/libc/intrin/lshrti3.c @@ -13,8 +13,6 @@ * ===----------------------------------------------------------------------=== */ -STATIC_YOINK("huge_compiler_rt_license"); - #include "third_party/compiler_rt/int_lib.h" #ifdef CRT_HAS_128BIT diff --git a/libc/log/libfatal.internal.h b/libc/log/libfatal.internal.h index 608e94c8d..0367110d7 100644 --- a/libc/log/libfatal.internal.h +++ b/libc/log/libfatal.internal.h @@ -19,12 +19,9 @@ forceinline long __sysv_exit(long rc) { : "memory", "cc"); #elif defined(__aarch64__) register long r0 asm("x0") = rc; + register long r8 asm("x8") = __NR_exit_group; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res_x0) - : "i"(94), "r"(r0) - : "x8", "memory"); + asm volatile("svc\t0" : "=r"(res_x0) : "r"(r0), "r"(r8) : "memory"); ax = res_x0; #else ax = syscall(__NR_exit_group, rc); @@ -41,12 +38,13 @@ forceinline int __sysv_close(long fd) { : "rdx", "memory", "cc"); #elif defined(__aarch64__) register long r0 asm("x0") = fd; + register long r8 asm("x8") = __NR_close; register long res_x0 asm("x0"); asm volatile("mov\tx8,%1\n\t" "svc\t0" : "=r"(res_x0) - : "i"(57), "r"(r0) - : "x8", "memory"); + : "r"(r0), "r"(r8) + : "memory"); ax = res_x0; #else ax = syscall(__NR_close, fd); @@ -66,12 +64,12 @@ forceinline int __sysv_open(const char *path, long flags, long mode) { register long r1 asm("x1") = (long)path; register long r2 asm("x2") = (long)flags; register long r3 asm("x3") = (long)mode; + register long r8 asm("x8") = (long)__NR_open; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(56), "r"(r0), "r"(r1), "r"(r2), "r"(r3) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r8) + : "memory"); ax = res_x0; #else ax = syscall(__NR_open, path, flags, mode); @@ -90,12 +88,12 @@ forceinline long __sysv_read(long fd, void *data, unsigned long size) { register long r0 asm("x0") = (long)fd; register long r1 asm("x1") = (long)data; register long r2 asm("x2") = (long)size; + register long r8 asm("x8") = (long)__NR_read; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(63), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); ax = res_x0; #else ax = syscall(__NR_read, fd, data, size); @@ -114,12 +112,12 @@ forceinline long __sysv_write(long fd, const void *data, unsigned long size) { register long r0 asm("x0") = (long)fd; register long r1 asm("x1") = (long)data; register long r2 asm("x2") = (long)size; + register long r8 asm("x8") = (long)__NR_write; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(64), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); + : "i"(64), "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); ax = res_x0; #else ax = syscall(__NR_write, fd, data, size); @@ -138,12 +136,12 @@ forceinline long __sysv_mprotect(void *addr, size_t size, long prot) { register long r0 asm("x0") = (long)addr; register long r1 asm("x1") = (long)size; register long r2 asm("x2") = (long)prot; + register long r8 asm("x8") = (long)__NR_mprotect; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(226), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); ax = res_x0; #else ax = syscall(__NR_mprotect, addr, size, prot); @@ -159,12 +157,9 @@ forceinline int __sysv_getpid(void) { : "0"(__NR_getpid) : "rdx", "memory", "cc"); #elif defined(__aarch64__) + register long r8 asm("x8") = (long)__NR_getpid; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res_x0) - : "i"(172) - : "x8", "memory"); + asm volatile("svc\t0" : "=r"(res_x0) : "r"(r8) : "memory"); ax = res_x0; #else ax = syscall(__NR_getpid); diff --git a/libc/nexgen32e/checkstackalign.S b/libc/nexgen32e/checkstackalign.S index d7f4173ae..60bd5cce8 100644 --- a/libc/nexgen32e/checkstackalign.S +++ b/libc/nexgen32e/checkstackalign.S @@ -22,6 +22,8 @@ // // This function crashes if called with a misaligned stack. CheckStackIsAligned: +#ifdef __x86_64__ + push %rbp mov %rsp,%rbp @@ -35,4 +37,14 @@ CheckStackIsAligned: leave ret + +#elif defined(__aarch64__) + +// TODO: support me + mov x0,#1 + ret + +#else +#error "unsupported architecture" +#endif .endfn CheckStackIsAligned,globl diff --git a/libc/nexgen32e/gclongjmp.S b/libc/nexgen32e/gclongjmp.S index fc98ca623..1112164e2 100644 --- a/libc/nexgen32e/gclongjmp.S +++ b/libc/nexgen32e/gclongjmp.S @@ -31,6 +31,7 @@ // @threadsafe // @noreturn _gclongjmp: +#ifdef __x86_64__ push %rbp mov %rsp,%rbp .profilable @@ -59,4 +60,9 @@ _gclongjmp: 2: pop %rsi pop %rdi jmp 0b +#elif defined(__aarch64__) + b longjmp +#else +#error "unsupported architecture" +#endif /* __x86_64__ */ .endfn _gclongjmp,globl diff --git a/libc/nexgen32e/mcount.S b/libc/nexgen32e/mcount.S index f198434b4..26cdeb4a9 100644 --- a/libc/nexgen32e/mcount.S +++ b/libc/nexgen32e/mcount.S @@ -17,10 +17,10 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/macros.internal.h" -.real // Function Profiling Hook. // cc -pg adds this to the start of global functions. mcount: ret - .endfn mcount,weak + .endfn mcount,globl,weak + .alias mcount,_mcount // aarch64 weirdness? .alias mcount,.mcount // freebsd weirdness? diff --git a/libc/nexgen32e/nexgen32e.mk b/libc/nexgen32e/nexgen32e.mk index f2d265377..579efa8aa 100644 --- a/libc/nexgen32e/nexgen32e.mk +++ b/libc/nexgen32e/nexgen32e.mk @@ -42,10 +42,22 @@ $(LIBC_NEXGEN32E_A).pkg: \ $(LIBC_NEXGEN32E_A_OBJS) \ $(foreach x,$(LIBC_NEXGEN32E_A_DIRECTDEPS),$($(x)_A).pkg) +o/$(MODE)/libc/nexgen32e/argc2.o \ +o/$(MODE)/libc/nexgen32e/argv2.o \ +o/$(MODE)/libc/nexgen32e/auxv2.o \ +o/$(MODE)/libc/nexgen32e/cescapec.o \ +o/$(MODE)/libc/nexgen32e/crc32init.o \ +o/$(MODE)/libc/nexgen32e/environ2.o \ +o/$(MODE)/libc/nexgen32e/envp2.o \ +o/$(MODE)/libc/nexgen32e/kbase36.o \ +o/$(MODE)/libc/nexgen32e/ktens.o \ +o/$(MODE)/libc/nexgen32e/ktolower.o \ +o/$(MODE)/libc/nexgen32e/ktoupper.o \ +o/$(MODE)/libc/nexgen32e/pid.o \ +o/$(MODE)/libc/nexgen32e/program_invocation_name2.o \ o/$(MODE)/libc/nexgen32e/threaded.o: private \ OVERRIDE_CFLAGS += \ - $(NO_MAGIC) \ - -fno-sanitize=all + $(NO_MAGIC) # these assembly files are safe to build on aarch64 o/$(MODE)/libc/nexgen32e/zip.o: libc/nexgen32e/zip.S @@ -70,6 +82,10 @@ o/$(MODE)/libc/nexgen32e/missingno.o: libc/nexgen32e/missingno.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< o/$(MODE)/libc/nexgen32e/khalfcache3.o: libc/nexgen32e/khalfcache3.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< +o/$(MODE)/libc/nexgen32e/gclongjmp.o: libc/nexgen32e/gclongjmp.S + @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< +o/$(MODE)/libc/nexgen32e/checkstackalign.o: libc/nexgen32e/checkstackalign.S + @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< LIBC_NEXGEN32E_LIBS = $(foreach x,$(LIBC_NEXGEN32E_ARTIFACTS),$($(x))) LIBC_NEXGEN32E_SRCS = $(foreach x,$(LIBC_NEXGEN32E_ARTIFACTS),$($(x)_SRCS)) diff --git a/libc/nexgen32e/program_invocation_name2.c b/libc/nexgen32e/program_invocation_name2.c index 9ef984c5b..c0451d948 100644 --- a/libc/nexgen32e/program_invocation_name2.c +++ b/libc/nexgen32e/program_invocation_name2.c @@ -18,4 +18,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/runtime/runtime.h" +#ifndef __x86_64__ + char *program_invocation_name; + +#endif /* __x86_64__ */ diff --git a/libc/runtime/cosmo2.c b/libc/runtime/cosmo2.c index ddd518d0b..b51823a24 100644 --- a/libc/runtime/cosmo2.c +++ b/libc/runtime/cosmo2.c @@ -31,19 +31,6 @@ int main(int, char **, char **) __attribute__((__weak__)); -#if 0 -static inline long sys_set_tid_address(int *t) { - register long res asm("x0"); - register long arg asm("x0") = (long)t; - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res) - : "i"(96), "r"(arg) - : "x8", "memory"); - return res; -} -#endif - typedef int init_f(int argc, char **argv, char **envp, unsigned long *auxv); extern init_f __strace_init; @@ -77,16 +64,19 @@ textstartup void cosmo(long *sp) { _mmi.n = ARRAYLEN(_mmi.s); _mmi.p = _mmi.s; __mmi_lock_obj._type = PTHREAD_MUTEX_RECURSIVE; - InitializeFileDescriptors(); #ifdef SYSDEBUG // initialize --strace functionality argc = __strace_init(argc, argv, envp, auxv); #endif +#if 0 #if IsAsan() __asan_init(argc, argv, envp, auxv); #endif +#endif + + InitializeFileDescriptors(); // set helpful globals __argc = argc; diff --git a/libc/runtime/endutxent.S b/libc/runtime/endutxent.S deleted file mode 100644 index 9fcc3697a..000000000 --- a/libc/runtime/endutxent.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Closes user accounting database. -// @note unsupported -endutxent: - xor %eax,%eax - ret - .endfn endutxent,globl diff --git a/libc/runtime/fork-sysv.c b/libc/runtime/fork-sysv.c index c60fc420d..5d5b8ba88 100644 --- a/libc/runtime/fork-sysv.c +++ b/libc/runtime/fork-sysv.c @@ -19,6 +19,7 @@ #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/sysv/consts/sig.h" +#include "libc/sysv/errfuns.h" int sys_fork(void) { #ifdef __x86_64__ @@ -37,23 +38,27 @@ int sys_fork(void) { #elif defined(__aarch64__) - int flags = 17; // SIGCHLD; - void *child_stack = 0; - void *parent_tidptr = 0; - void *newtls = 0; - void *child_tidptr = 0; - register long r0 asm("x0") = (long)flags; - register long r1 asm("x1") = (long)child_stack; - register long r2 asm("x2") = (long)parent_tidptr; - register long r3 asm("x3") = (long)newtls; - register long r4 asm("x4") = (long)child_tidptr; - register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" - : "=r"(res_x0) - : "i"(220), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) - : "x8", "memory"); - return _sysret(res_x0); + if (IsLinux()) { + int flags = 17; // SIGCHLD; + void *child_stack = 0; + void *parent_tidptr = 0; + void *newtls = 0; + void *child_tidptr = 0; + register long r0 asm("x0") = (long)flags; + register long r1 asm("x1") = (long)child_stack; + register long r2 asm("x2") = (long)parent_tidptr; + register long r3 asm("x3") = (long)newtls; + register long r4 asm("x4") = (long)child_tidptr; + register long res_x0 asm("x0"); + asm volatile("mov\tx8,%1\n\t" + "svc\t0" + : "=r"(res_x0) + : "i"(220), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4) + : "x8", "memory"); + return _sysret(res_x0); + } else { + return enosys(); + } #endif } diff --git a/libc/runtime/getutent.S b/libc/runtime/getutent.S deleted file mode 100644 index 01975567a..000000000 --- a/libc/runtime/getutent.S +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -getutent: - xor %eax,%eax - ret - .endfn getutent,globl diff --git a/libc/runtime/getutid.S b/libc/runtime/getutid.S deleted file mode 100644 index ccb471dec..000000000 --- a/libc/runtime/getutid.S +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -getutid: - xor %eax,%eax - ret - .endfn getutid,globl diff --git a/libc/runtime/getutxent.S b/libc/runtime/getutxent.S deleted file mode 100644 index 0258e492f..000000000 --- a/libc/runtime/getutxent.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Reads next entry in user accounting database. -// @note unsupported -getutxent: - xor %eax,%eax - ret - .endfn getutxent,globl diff --git a/libc/runtime/getutxid.S b/libc/runtime/getutxid.S deleted file mode 100644 index 448ff9b81..000000000 --- a/libc/runtime/getutxid.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Searches forward in the user accounting database. -// @note unsupported -getutxid: - xor %eax,%eax - ret - .endfn getutxid,globl diff --git a/libc/runtime/getutxline.S b/libc/runtime/getutxline.S deleted file mode 100644 index 22f99027c..000000000 --- a/libc/runtime/getutxline.S +++ /dev/null @@ -1,27 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Searches forward in the user accounting database. -// @note unsupported -getutxline: - xor %eax,%eax - ret - .endfn getutxline,globl - .alias getutxline,getutline diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h index 8858d1cbf..1d1da1c20 100644 --- a/libc/runtime/memtrack.internal.h +++ b/libc/runtime/memtrack.internal.h @@ -122,10 +122,18 @@ forceinline pureconst bool IsStackFrame(int x) { x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16); } -forceinline pureconst bool IsOldStackFrame(int x) { +forceinline pureconst bool IsOldStack(const void *x) { /* openbsd uses 4mb stack by default */ /* freebsd uses 512mb stack by default */ /* most systems use 8mb stack by default */ + size_t foss_stack_size = 4ul * 1024 * 1024; + uintptr_t top = ROUNDUP(__oldstack, FRAMESIZE); + uintptr_t bot = top - foss_stack_size; + uintptr_t old = ROUNDDOWN(__oldstack, foss_stack_size); + return bot <= (uintptr_t)x && (uintptr_t)x < top; +} + +forceinline pureconst bool IsOldStackFrame(int x) { size_t foss_stack_size = 4ul * 1024 * 1024; uintptr_t top = ROUNDUP(__oldstack, FRAMESIZE); uintptr_t bot = top - foss_stack_size; diff --git a/libc/runtime/morph.greg.c b/libc/runtime/morph.greg.c index 3ee0e76e9..3bba19d60 100644 --- a/libc/runtime/morph.greg.c +++ b/libc/runtime/morph.greg.c @@ -45,12 +45,12 @@ static inline int __morph_rt_sigprocmask(int h, const sigset_t *s, sigset_t *o, register long r1 asm("x1") = (long)s; register long r2 asm("x2") = (long)o; register long r3 asm("x3") = (long)c; + register long r8 asm("x8") = (long)__NR_sigprocmask; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(135), "r"(r0), "r"(r1), "r"(r2), "r"(r3) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r8) + : "memory"); return res_x0; #else return 0; @@ -89,12 +89,12 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot, register long r0 asm("x0") = (long)addr; register long r1 asm("x1") = (long)size; register long r2 asm("x2") = (long)prot; + register long r8 asm("x8") = (long)__NR_mprotect; register long res_x0 asm("x0"); - asm volatile("mov\tx8,%1\n\t" - "svc\t0" + asm volatile("svc\t0" : "=r"(res_x0) - : "i"(226), "r"(r0), "r"(r1), "r"(r2) - : "x8", "memory"); + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); _npassert(!res_x0); #endif } diff --git a/libc/runtime/printargs.c b/libc/runtime/printargs.c index 4123e771c..43bc933c1 100644 --- a/libc/runtime/printargs.c +++ b/libc/runtime/printargs.c @@ -63,8 +63,6 @@ #include "tool/decode/lib/idname.h" #include "tool/decode/lib/x86idnames.h" -#ifdef __x86_64__ - STATIC_YOINK("strerror"); // for kprintf() STATIC_YOINK("strsignal"); // for kprintf() @@ -203,6 +201,7 @@ textstartup void __printargs(const char *prologue) { PRINT(""); PRINT("MICROPROCESSOR"); kprintf(prologue); +#ifdef __x86_64__ kprintf(" %.*s%.*s%.*s", 4, &KCPUIDS(0H, EBX), 4, &KCPUIDS(0H, EDX), 4, &KCPUIDS(0H, ECX)); if (getx86processormodel(kX86ProcessorModelKey)) { @@ -272,6 +271,9 @@ textstartup void __printargs(const char *prologue) { if (X86_HAVE(RDPID)) kprintf(" RDPID"); if (X86_HAVE(LA57)) kprintf(" LA57"); if (X86_HAVE(FSGSBASE)) kprintf(" FSGSBASE"); +#elif defined(__aarch64__) + PRINT(" AARCH64\n"); +#endif kprintf("\n"); PRINT(""); @@ -424,8 +426,10 @@ textstartup void __printargs(const char *prologue) { PRINT(" ☼ %s = %d", "getgid()", getgid()); PRINT(" ☼ %s = %d", "getegid()", getegid()); PRINT(" ☼ %s = %#s", "kTmpPath", kTmpPath); +#ifdef __x86_64__ PRINT(" ☼ %s = %#s", "kNtSystemDirectory", kNtSystemDirectory); PRINT(" ☼ %s = %#s", "kNtWindowsDirectory", kNtWindowsDirectory); +#endif PRINT(" ☼ %s = %#s", "GetProgramExecutableName", GetProgramExecutableName()); PRINT(" ☼ %s = %#s", "GetInterpreterExecutableName", GetInterpreterExecutableName(u.path, sizeof(u.path))); @@ -713,5 +717,3 @@ textstartup void __printargs(const char *prologue) { ftrace_enabled(+1); errno = e; } - -#endif /* __x86_64__ */ diff --git a/libc/runtime/setutent.S b/libc/runtime/setutent.S deleted file mode 100644 index f46d0aea9..000000000 --- a/libc/runtime/setutent.S +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -setutent: - xor %eax,%eax - ret - .endfn setutent,globl diff --git a/libc/runtime/setutxent.S b/libc/runtime/setutxent.S deleted file mode 100644 index 49d2429e9..000000000 --- a/libc/runtime/setutxent.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Rewinds the user accounting database. -// @note unsupported -setutxent: - xor %eax,%eax - ret - .endfn setutxent,globl diff --git a/libc/runtime/updwtmp.S b/libc/runtime/updwtmp.S deleted file mode 100644 index 2af33f04d..000000000 --- a/libc/runtime/updwtmp.S +++ /dev/null @@ -1,24 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -updwtmp: - xor %eax,%eax - ret - .endfn updwtmp,globl diff --git a/libc/runtime/updwtmpx.S b/libc/runtime/updwtmpx.S deleted file mode 100644 index e20d729a3..000000000 --- a/libc/runtime/updwtmpx.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Does something to the user accounting database. -// @note unsupported -updwtmpx: - xor %eax,%eax - ret - .endfn updwtmpx,globl diff --git a/libc/runtime/endutent.S b/libc/runtime/utmp.c similarity index 62% rename from libc/runtime/endutent.S rename to libc/runtime/utmp.c index 0f154bb5c..819c103d3 100644 --- a/libc/runtime/endutent.S +++ b/libc/runtime/utmp.c @@ -1,7 +1,7 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,9 +16,57 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" +#include "libc/runtime/utmp.h" +#include "libc/errno.h" +#include "libc/runtime/utmpx.h" -endutent: - xor %eax,%eax - ret - .endfn endutent,globl +void setutent(void) { +} + +void endutent(void) { +} + +void endutxent(void) { +} + +struct utmp *getutent(void) { + return 0; +} + +void updwtmp(const char *x, const struct utmp *y) { +} + +void updwtmpx(const char *x, const struct utmpx *y) { +} + +void setutxent(void) { +} + +struct utmp *getutid(const struct utmp *x) { + return 0; +} + +struct utmpx *getutxent(void) { + return 0; +} + +struct utmpx *getutxid(const struct utmpx *x) { + return 0; +} + +struct utmpx *getutxline(const struct utmpx *x) { + return 0; +} + +int __utmpxname() { + errno = ENOTSUP; + return -1; +} + +int utmpname(const char *x) { + return __utmpxname(); +} + +int utmpxname(const char *x) { + return __utmpxname(); +} diff --git a/libc/sock/asanmsghdr.c b/libc/sock/asanmsghdr.c index 0b993f093..bf4cff9dc 100644 --- a/libc/sock/asanmsghdr.c +++ b/libc/sock/asanmsghdr.c @@ -16,8 +16,10 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/sock/struct/msghdr.h" +#if IsAsan() bool __asan_is_valid_msghdr(const struct msghdr *msg) { if (!__asan_is_valid(msg, sizeof(struct msghdr))) return false; @@ -29,3 +31,5 @@ bool __asan_is_valid_msghdr(const struct msghdr *msg) { } return __asan_is_valid_iov(msg->msg_iov, msg->msg_iovlen); } + +#endif diff --git a/libc/str/djbsort.c b/libc/str/djbsort.c index 17b0e303e..578784f8d 100644 --- a/libc/str/djbsort.c +++ b/libc/str/djbsort.c @@ -33,10 +33,14 @@ void djbsort(int32_t *a, size_t n) { __asan_verify(a, m); } if (n > 1) { +#ifdef __x86_64__ if (X86_HAVE(AVX2)) { djbsort_avx2(a, n); } else { _intsort(a, n); } +#else + _intsort(a, n); +#endif /* __x86_64__ */ } } diff --git a/libc/sysv/calls/sys_access.S b/libc/sysv/calls/sys_access.S deleted file mode 100644 index ac9340ce1..000000000 --- a/libc/sysv/calls/sys_access.S +++ /dev/null @@ -1,2 +0,0 @@ -#include "libc/sysv/macros.internal.h" -.scall sys_access,0x0210210212021015,0xfff,globl,hidden diff --git a/libc/sysv/consts/nrlinux.h b/libc/sysv/consts/nrlinux.h index 3932177bb..f3fe460a5 100644 --- a/libc/sysv/consts/nrlinux.h +++ b/libc/sysv/consts/nrlinux.h @@ -360,6 +360,7 @@ #define __NR_linux_stat 0x004f #define __NR_linux_fstat 0x0050 #define __NR_linux_ppoll 0x0049 +#define __NR_linux_brk 0x00d6 #define __NR_linux_sigreturn 0x008b #define __NR_linux_lseek 0x003e #define __NR_linux_mmap 0x00de diff --git a/libc/sysv/errfun.S b/libc/sysv/errfun.S index 92194bebc..ca7c85a60 100644 --- a/libc/sysv/errfun.S +++ b/libc/sysv/errfun.S @@ -33,6 +33,7 @@ __errfun: str w19,[x0] mov x0,#-1 ldp x19,x30,[sp],#16 + ret #else #error "unsupported architecture" #endif diff --git a/libc/sysv/errno.c b/libc/sysv/errno.c index 267e1e48a..53c1f9778 100644 --- a/libc/sysv/errno.c +++ b/libc/sysv/errno.c @@ -18,11 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" -asm(".weak\t__asan_init"); -asm(".weak\t__asan_register_globals"); -asm(".weak\t__asan_unregister_globals"); -asm(".weak\t__asan_version_mismatch_check_v8"); - /** * Global variable for last error. * diff --git a/libc/sysv/syscalls.sh b/libc/sysv/syscalls.sh index 32eb62140..770f27311 100755 --- a/libc/sysv/syscalls.sh +++ b/libc/sysv/syscalls.sh @@ -56,7 +56,6 @@ scall sys_pread 0x8ad8ad9db2899811 0x043 globl hidden # a.k.a. pread64; netbsd+ scall sys_pwrite 0x8ae8ae9dc289a812 0x044 globl hidden # a.k.a. pwrite64; netbsd+openbsd:pad scall sys_readv 0x8788788782878813 0x041 globl hidden scall sys_writev 0x8798798792879814 0x042 globl hidden -scall sys_access 0x0210210212021015 0xfff globl hidden scall __sys_pipe 0x02a10721e202a016 0x03b globl hidden # NOTE: pipe2() on FreeBSD and Linux Aarch64; XNU is pipe(void)→eax:edx scall sys_select 0x9a184785d285d817 0xfff globl hidden scall sys_pselect 0x9b486ea0a298a90e 0x048 globl hidden # pselect6() on gnu/systemd diff --git a/libc/sysv/syscount.S b/libc/sysv/syscount.S index a013d5b6c..6923fdeff 100644 --- a/libc/sysv/syscount.S +++ b/libc/sysv/syscount.S @@ -24,6 +24,7 @@ // for the purpose of counting non-Windows system calls. Please // note wrappers may still short circuit calls sometimes, which // wouldn't impact this counter. + .bss .balign 8 __syscount: @@ -31,6 +32,8 @@ __syscount: .endobj __syscount,globl .previous +#ifdef __x86_64__ + .initbss 701,_init___syscount __syscount_next: .quad 0 @@ -49,3 +52,5 @@ syscount: ezlea syscount,ax mov %rax,__systemfive(%rip) .init.end 701,_init___syscount + +#endif /* __x86_64__ */ diff --git a/libc/sysv/sysv.mk b/libc/sysv/sysv.mk index 416cdce9a..eae66df98 100644 --- a/libc/sysv/sysv.mk +++ b/libc/sysv/sysv.mk @@ -70,6 +70,11 @@ $(LIBC_SYSV_A).pkg: \ $(LIBC_SYSV_A_OBJS) \ $(foreach x,$(LIBC_SYSV_A_DIRECTDEPS),$($(x)_A).pkg) +o/$(MODE)/libc/sysv/errno.o \ +o/$(MODE)/libc/sysv/sysret.o: private \ + OVERRIDE_CFLAGS += \ + $(NO_MAGIC) + #─────────────────────────────────────────────────────────────────────────────── LIBC_SYSV_CALLS = \ @@ -129,6 +134,8 @@ $(LIBC_SYSV_MACHCALLS_A).pkg: \ # let aarch64 compile these o/$(MODE)/libc/sysv/errfun.o: libc/sysv/errfun.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) $< +o/$(MODE)/libc/sysv/syscount.o: libc/sysv/syscount.S + @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) $< o/$(MODE)/libc/sysv/restorert.o: libc/sysv/restorert.S @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) $< o/$(MODE)/libc/sysv/calls/%.o: libc/sysv/calls/%.S diff --git a/libc/zipos/open.c b/libc/zipos/open.c index 61b71d1ec..48ed21c7c 100644 --- a/libc/zipos/open.c +++ b/libc/zipos/open.c @@ -210,14 +210,18 @@ int __zipos_open(const struct ZiposUri *name, unsigned flags, int mode) { if ((zipos = __zipos_get())) { if ((cf = __zipos_find(zipos, name)) != -1) { rc = __zipos_load(zipos, cf, flags, mode); + assert(rc != 0); } else { rc = enoent(); + assert(rc != 0); } } else { rc = enoexec(); + assert(rc != 0); } } else { rc = einval(); + assert(rc != 0); } ALLOW_SIGNALS; return rc; diff --git a/net/https/https.mk b/net/https/https.mk index 68162f68a..e98b11a11 100644 --- a/net/https/https.mk +++ b/net/https/https.mk @@ -35,6 +35,7 @@ NET_HTTPS_A_DIRECTDEPS = \ LIBC_TIME \ LIBC_X \ LIBC_ZIPOS \ + THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_MBEDTLS NET_HTTPS_A_DEPS := \ diff --git a/test/libc/calls/diagnose_syscall_test.c b/test/libc/calls/diagnose_syscall_test.c index 657e26e0d..fb2a79926 100644 --- a/test/libc/calls/diagnose_syscall_test.c +++ b/test/libc/calls/diagnose_syscall_test.c @@ -27,6 +27,8 @@ #include "libc/sysv/consts/nr.h" #include "libc/testlib/testlib.h" +#ifdef __x86_64__ + #define Z 0x5555555555555555 #define FLAGS_cf 0 @@ -137,3 +139,5 @@ TEST(diagnose_syscall, testWriteFailed) { ASSERT_STREQ("rax rcx r11", _gc(DiffContexts(&x, &y))); } } + +#endif /* __x86_64__ */ diff --git a/test/libc/calls/getcontext_test.c b/test/libc/calls/getcontext_test.c index 2308d62a9..1e94533f7 100644 --- a/test/libc/calls/getcontext_test.c +++ b/test/libc/calls/getcontext_test.c @@ -22,6 +22,8 @@ #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" +#ifdef __x86_64__ + int x; bool ok1; bool ok2; @@ -62,3 +64,5 @@ void SetGetContext(void) { BENCH(getcontext, bench) { EZBENCH2("get/setcontext", donothing, SetGetContext()); } + +#endif /* __x86_64__ */ diff --git a/test/libc/calls/poll_test.c b/test/libc/calls/poll_test.c index 72471202c..953bc7a03 100644 --- a/test/libc/calls/poll_test.c +++ b/test/libc/calls/poll_test.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/sysv/consts/poll.h" #include "libc/calls/calls.h" #include "libc/calls/pledge.h" #include "libc/calls/struct/sigaction.h" @@ -32,7 +33,6 @@ #include "libc/sysv/consts/af.h" #include "libc/sysv/consts/inaddr.h" #include "libc/sysv/consts/ipproto.h" -#include "libc/sysv/consts/poll.h" #include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sock.h" #include "libc/testlib/testlib.h" @@ -40,7 +40,6 @@ #include "libc/x/xasprintf.h" #include "third_party/chibicc/test/test.h" #include "tool/decode/lib/flagger.h" -#include "tool/decode/lib/pollnames.h" bool gotsig; @@ -60,8 +59,7 @@ void OnSig(int sig) { dontdiscard char *FormatPollFd(struct pollfd p[2]) { return xasprintf("fd:%d revents:%s\n" "fd:%d revents:%s\n", - p[0].fd, _gc(RecreateFlags(kPollNames, p[0].revents)), - p[1].fd, _gc(RecreateFlags(kPollNames, p[1].revents))); + p[0].fd, "", p[1].fd, ""); } TEST(poll, allZero_doesNothingPrettyMuch) { diff --git a/test/libc/calls/read_test.c b/test/libc/calls/read_test.c index 26e462da1..fd965a9c0 100644 --- a/test/libc/calls/read_test.c +++ b/test/libc/calls/read_test.c @@ -16,11 +16,13 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/iovec.h" #include "libc/calls/struct/iovec.internal.h" #include "libc/calls/syscall-sysv.internal.h" +#include "libc/dce.h" #include "libc/sock/internal.h" #include "libc/sysv/consts/nr.h" #include "libc/sysv/consts/o.h" @@ -44,12 +46,25 @@ TEST(read, eof) { //////////////////////////////////////////////////////////////////////////////// static long Read(long fd, void *buf, unsigned long size) { +#ifdef __x86_64__ long ax, di, si, dx; asm volatile("syscall" : "=a"(ax), "=D"(di), "=S"(si), "=d"(dx) : "0"(__NR_read), "1"(fd), "2"(buf), "3"(size) : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"); return ax; +#elif defined(__aarch64__) + register long r0 asm("x0") = (long)fd; + register long r1 asm("x1") = (long)buf; + register long r2 asm("x2") = (long)size; + register long r8 asm("x8") = (long)__NR_read; + register long res_x0 asm("x0"); + asm volatile("svc\t0" + : "=r"(res_x0) + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); + return res_x0; +#endif } BENCH(read, bench) { diff --git a/test/libc/calls/sigaction_test.c b/test/libc/calls/sigaction_test.c index 549e3a8b0..f1965fb8f 100644 --- a/test/libc/calls/sigaction_test.c +++ b/test/libc/calls/sigaction_test.c @@ -16,9 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/struct/sigaction.h" #include "libc/calls/calls.h" #include "libc/calls/struct/rusage.h" -#include "libc/calls/struct/sigaction.h" #include "libc/calls/struct/siginfo.h" #include "libc/calls/struct/sigset.h" #include "libc/calls/struct/sigset.internal.h" @@ -112,6 +112,8 @@ TEST(sigaction, testPingPongParentChildWithSigint) { EXPECT_SYS(0, 0, sigprocmask(SIG_BLOCK, &oldmask, 0)); } +#ifdef __x86_64__ + //////////////////////////////////////////////////////////////////////////////// // test int3 crash // we expect this to be recoverable by default @@ -164,6 +166,8 @@ TEST(sigaction, sigFpe_handlerCanEditProcessStateAndRecoverExecution) { ubsanTrumpsSystemsEngineering(); } +#endif /* __x86_64__ */ + static unsigned OnSignalCnt = 0; void OnSignal(int sig, siginfo_t *si, void *ctx) { OnSignalCnt++; diff --git a/test/libc/calls/signal_test.c b/test/libc/calls/signal_test.c index 2b7aaa21d..7381d54c9 100644 --- a/test/libc/calls/signal_test.c +++ b/test/libc/calls/signal_test.c @@ -48,7 +48,7 @@ void OnSigTrap(int sig, struct siginfo *si, void *ctx) { void TrapBench(int n) { for (int i = 0; i < n; ++i) { - asm("int3"); + __builtin_trap(); } } @@ -72,6 +72,8 @@ BENCH(signal, trapBenchSiginfo) { sigaction(SIGTRAP, &old, 0); } +#ifdef __x86_64__ + void OnSigHlt(int sig, struct siginfo *si, void *vctx) { struct ucontext *ctx = vctx; ctx->uc_mcontext.rip += 1; @@ -94,3 +96,5 @@ BENCH(signal, hltBenchSiginfo) { sigaction(SIGSEGV, old + 0, 0); sigaction(SIGBUS, old + 1, 0); } + +#endif /* __x86_64__ */ diff --git a/test/libc/calls/stat_test.c b/test/libc/calls/stat_test.c index 4420d86b7..20fdfa14c 100644 --- a/test/libc/calls/stat_test.c +++ b/test/libc/calls/stat_test.c @@ -16,10 +16,11 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/struct/stat.h" +#include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/metastat.internal.h" -#include "libc/calls/struct/stat.h" #include "libc/dce.h" #include "libc/errno.h" #include "libc/mem/gc.internal.h" @@ -71,16 +72,8 @@ TEST(stat, zipos) { EXPECT_SYS(0, 0, stat("/zip/.python/", &st)); } -static long Stat(const char *path, struct stat *st) { - long ax, di, si, dx; - asm volatile("syscall" - : "=a"(ax), "=D"(di), "=S"(si), "=d"(dx) - : "0"(__NR_stat), "1"(path), "2"(st) - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"); - return ax; -} - static long Fstatat(const char *path, struct stat *st) { +#ifdef __x86_64__ long ax, di, si, dx; register long r10 asm("r10") = 0; asm volatile("syscall" @@ -88,6 +81,21 @@ static long Fstatat(const char *path, struct stat *st) { : "0"(__NR_fstatat), "1"(AT_FDCWD), "2"(path), "3"(st) : "rcx", "r8", "r9", "r11", "memory", "cc"); return ax; +#elif defined(__aarch64__) + register long r0 asm("x0") = (long)AT_FDCWD; + register long r1 asm("x1") = (long)path; + register long r2 asm("x2") = (long)st; + register long r3 asm("x3") = (long)0; + register long r8 asm("x8") = (long)__NR_fstatat; + register long res_x0 asm("x0"); + asm volatile("svc\t0" + : "=r"(res_x0) + : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r8) + : "memory"); + return res_x0; +#else +#error "unsupported architecture" +#endif } BENCH(stat, bench) { @@ -100,10 +108,6 @@ BENCH(stat, bench) { "tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt", 0644)); if (!IsWindows() && !IsFreebsd()) { - EZBENCH2("stat syscall", donothing, - Stat(".python/test/" - "tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt", - &st)); EZBENCH2("fstatat syscall", donothing, Fstatat(".python/test/" "tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt", diff --git a/test/libc/calls/test.mk b/test/libc/calls/test.mk index ac129d8ef..e9f52bbb3 100644 --- a/test/libc/calls/test.mk +++ b/test/libc/calls/test.mk @@ -52,6 +52,7 @@ TEST_LIBC_CALLS_DIRECTDEPS = \ LIBC_X \ LIBC_ZIPOS \ TOOL_DECODE_LIB \ + THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_XED TEST_LIBC_CALLS_DEPS := \ diff --git a/test/libc/calls/write_test.c b/test/libc/calls/write_test.c index 56cda5e29..ad6b9fca7 100644 --- a/test/libc/calls/write_test.c +++ b/test/libc/calls/write_test.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/struct/iovec.h" @@ -96,12 +97,27 @@ TEST(write, rlimitFsizeExceeded_raisesEfbig) { } static long Write(long fd, const void *data, unsigned long size) { +#ifdef __x86_64__ long ax, di, si, dx; asm volatile("syscall" : "=a"(ax), "=D"(di), "=S"(si), "=d"(dx) : "0"(__NR_write), "1"(fd), "2"(data), "3"(size) : "rcx", "r8", "r9", "r10", "r11", "memory", "cc"); return ax; +#elif defined(__aarch64__) + register long r0 asm("x0") = (long)fd; + register long r1 asm("x1") = (long)data; + register long r2 asm("x2") = (long)size; + register long r8 asm("x8") = (long)__NR_write; + register long res_x0 asm("x0"); + asm volatile("svc\t0" + : "=r"(res_x0) + : "r"(r0), "r"(r1), "r"(r2), "r"(r8) + : "memory"); + return res_x0; +#else +#error "unsupported architecture" +#endif } BENCH(write, bench) { diff --git a/test/libc/mem/djbsort_test.c b/test/libc/mem/djbsort_test.c index cc5995273..3f65c09f8 100644 --- a/test/libc/mem/djbsort_test.c +++ b/test/libc/mem/djbsort_test.c @@ -50,6 +50,7 @@ void insertionsort(int32_t *a, size_t n) { } } +#ifdef __x86_64__ TEST(djbsort, test4) { static const int kA[] = {4, 3, 2, 1}; n = ARRAYLEN(kA); @@ -62,6 +63,7 @@ TEST(djbsort, test4) { ASSERT_EQ(0, memcmp(a, b, n * 4)); ASSERT_EQ(0, memcmp(a, c, n * 4)); } +#endif /* __x86_64__ */ TEST(djbsort, test64) { static const int kA[64] = { @@ -86,10 +88,12 @@ TEST(djbsort, test64) { insertionsort(a, n); djbsort(c, n); ASSERT_EQ(0, memcmp(a, c, n * 4)); +#ifdef __x86_64__ if (X86_HAVE(AVX2)) { djbsort_avx2(b, n); ASSERT_EQ(0, memcmp(a, b, n * 4)); } +#endif /* __x86_64__ */ } static int CompareInt(const void *a, const void *b) { diff --git a/test/libc/mem/test.mk b/test/libc/mem/test.mk index 389bc918f..135ce3001 100644 --- a/test/libc/mem/test.mk +++ b/test/libc/mem/test.mk @@ -87,13 +87,15 @@ o/$(MODE)/test/libc/mem/prog/life.com.dbg: \ o/$(MODE)/test/libc/mem/prog/life.elf: \ o/$(MODE)/tool/build/assimilate.com \ - o/$(MODE)/test/libc/mem/prog/life.com + o/$(MODE)/test/libc/mem/prog/life.com \ + $(VM) @$(COMPILE) -wACP -T$@ \ build/bootstrap/cp.com \ o/$(MODE)/test/libc/mem/prog/life.com \ o/$(MODE)/test/libc/mem/prog/life.elf @$(COMPILE) -wAASSIMILATE -T$@ \ - o/$(MODE)/tool/build/assimilate.com \ + $(VM) \ + o/$(MODE)/tool/build/assimilate.com -f \ o/$(MODE)/test/libc/mem/prog/life.elf o/$(MODE)/test/libc/mem/prog/life.elf.zip.o: private \ @@ -112,13 +114,15 @@ o/$(MODE)/test/libc/mem/prog/sock.com.dbg: \ o/$(MODE)/test/libc/mem/prog/sock.elf: \ o/$(MODE)/tool/build/assimilate.com \ - o/$(MODE)/test/libc/mem/prog/sock.com + o/$(MODE)/test/libc/mem/prog/sock.com \ + $(VM) @$(COMPILE) -wACP -T$@ \ build/bootstrap/cp.com \ o/$(MODE)/test/libc/mem/prog/sock.com \ o/$(MODE)/test/libc/mem/prog/sock.elf @$(COMPILE) -wAASSIMILATE -T$@ \ - o/$(MODE)/tool/build/assimilate.com \ + $(VM) \ + o/$(MODE)/tool/build/assimilate.com -f \ o/$(MODE)/test/libc/mem/prog/sock.elf o/$(MODE)/test/libc/mem/prog/sock.elf.zip.o: private \ diff --git a/third_party/bzip2/bz_internal_error.c b/third_party/bzip2/bz_internal_error.c index 4b0ed204c..e49b583c4 100644 --- a/third_party/bzip2/bz_internal_error.c +++ b/third_party/bzip2/bz_internal_error.c @@ -18,5 +18,5 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ void bz_internal_error(int err) { - asm("hlt"); + __builtin_trap(); } diff --git a/third_party/bzip2/crctable.c b/third_party/bzip2/crctable.c index c4ee9a00e..0ff0185d2 100644 --- a/third_party/bzip2/crctable.c +++ b/third_party/bzip2/crctable.c @@ -34,7 +34,7 @@ static textstartup void BZ2_crc32Table_init() { BZ2_crc32Table[i] = u; } if (BZ2_crc32Table[0] || BZ2_crc32Table[255] != 0xb1f740b4) { - asm("hlt"); + __builtin_trap(); } } diff --git a/third_party/chibicc/chibicc.mk b/third_party/chibicc/chibicc.mk index e38ab6a7c..fa2688610 100644 --- a/third_party/chibicc/chibicc.mk +++ b/third_party/chibicc/chibicc.mk @@ -10,6 +10,8 @@ # This makefile compiles and runs each test twice. The first with # GCC-built chibicc, and a second time with chibicc-built chibicc +ifeq ($(ARCH), x86_64) + CHIBICC = o/$(MODE)/third_party/chibicc/chibicc.com CHIBICC_FLAGS = \ -fno-common \ @@ -111,6 +113,8 @@ THIRD_PARTY_CHIBICC_CHECKS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x) THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS)) $(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk +endif + .PHONY: o/$(MODE)/third_party/chibicc o/$(MODE)/third_party/chibicc: \ o/$(MODE)/third_party/chibicc/test \ diff --git a/third_party/chibicc/test/test.mk b/third_party/chibicc/test/test.mk index f6a451c88..19dffa25f 100644 --- a/third_party/chibicc/test/test.mk +++ b/third_party/chibicc/test/test.mk @@ -10,6 +10,8 @@ # This makefile compiles and runs each test twice. The first with # GCC-built chibicc, and a second time with chibicc-built chibicc +ifeq ($(ARCH), x86_64) + PKGS += THIRD_PARTY_CHIBICC_TEST THIRD_PARTY_CHIBICC_TEST_A = o/$(MODE)/third_party/chibicc/test/test.a @@ -75,6 +77,8 @@ o/$(MODE)/third_party/chibicc/test/%.o: \ o/$(MODE)/third_party/chibicc/test/int128_test.o: private QUOTA = -M1024m +endif + .PHONY: o/$(MODE)/third_party/chibicc/test o/$(MODE)/third_party/chibicc/test: \ $(THIRD_PARTY_CHIBICC_TEST_BINS) \ diff --git a/third_party/gcc/libexec/gcc/aarch64-linux-musl/9.2.0/collect-ld.sym b/third_party/gcc/libexec/gcc/aarch64-linux-musl/9.2.0/collect-ld.sym new file mode 100644 index 000000000..d186e533d --- /dev/null +++ b/third_party/gcc/libexec/gcc/aarch64-linux-musl/9.2.0/collect-ld.sym @@ -0,0 +1 @@ +../../../../x86_64-linux-musl/bin/ld.bfd diff --git a/third_party/sed/process.c b/third_party/sed/process.c index 8e31114a6..e5d2dac9e 100644 --- a/third_party/sed/process.c +++ b/third_party/sed/process.c @@ -68,7 +68,7 @@ static __inline int regexec_e(regex_t *, const char *, int, int, size_t); static void regsub(SPACE *, char *, char *); static int substitute(struct s_command *); -struct s_appends *appends; /* Array of pointers to strings to append. */ +struct s_appends *appends_; /* Array of pointers to strings to append. */ static size_t appendx; /* Index into appends array. */ size_t appendnum; /* Size of appends array. */ @@ -111,12 +111,12 @@ redirect: goto redirect; case 'a': if (appendx >= appendnum) - appends = xrealloc(appends, + appends_ = xrealloc(appends_, sizeof(struct s_appends) * (appendnum *= 2)); - appends[appendx].type = AP_STRING; - appends[appendx].s = cp->t; - appends[appendx].len = strlen(cp->t); + appends_[appendx].type = AP_STRING; + appends_[appendx].s = cp->t; + appends_[appendx].len = strlen(cp->t); appendx++; break; case 'b': @@ -204,12 +204,12 @@ redirect: exit(0); case 'r': if (appendx >= appendnum) - appends = xrealloc(appends, + appends_ = xrealloc(appends_, sizeof(struct s_appends) * (appendnum *= 2)); - appends[appendx].type = AP_FILE; - appends[appendx].s = cp->t; - appends[appendx].len = strlen(cp->t); + appends_[appendx].type = AP_FILE; + appends_[appendx].s = cp->t; + appends_[appendx].len = strlen(cp->t); appendx++; break; case 's': @@ -541,9 +541,9 @@ flush_appends(void) char *buf = gc(malloc(8 * 1024)); for (i = 0; i < appendx; i++) - switch (appends[i].type) { + switch (appends_[i].type) { case AP_STRING: - fwrite(appends[i].s, sizeof(char), appends[i].len, + fwrite(appends_[i].s, sizeof(char), appends_[i].len, outfile); break; case AP_FILE: @@ -555,7 +555,7 @@ flush_appends(void) * would be truly bizarre, but possible. It's probably * not that big a performance win, anyhow. */ - if ((f = fopen(appends[i].s, "r")) == NULL) + if ((f = fopen(appends_[i].s, "r")) == NULL) break; while ((count = fread(buf, sizeof(char), sizeof(buf), f))) (void)fwrite(buf, sizeof(char), count, outfile); diff --git a/third_party/sqlite3/sqlite3.mk b/third_party/sqlite3/sqlite3.mk index 88f7b79d7..9bff4f494 100644 --- a/third_party/sqlite3/sqlite3.mk +++ b/third_party/sqlite3/sqlite3.mk @@ -58,6 +58,7 @@ THIRD_PARTY_SQLITE3_A_DIRECTDEPS = \ LIBC_TIME \ LIBC_TINYMATH \ LIBC_ZIPOS \ + THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_GDTOA \ THIRD_PARTY_LINENOISE \ THIRD_PARTY_MUSL \ @@ -79,7 +80,8 @@ o/$(MODE)/third_party/sqlite3/sqlite3.com.dbg: \ o/$(MODE)/third_party/sqlite3/sqlite3.com: \ o/$(MODE)/third_party/sqlite3/sqlite3.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ - o/$(MODE)/tool/build/symtab.com + o/$(MODE)/tool/build/symtab.com \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) diff --git a/third_party/stb/idct-sse.S b/third_party/stb/idct-sse.S deleted file mode 100644 index 0ab2c1a1b..000000000 --- a/third_party/stb/idct-sse.S +++ /dev/null @@ -1,426 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Computes inverse discrete cosine transform. -// -// @note used to decode jpeg - .p2align 4 -stbi__idct_simd$sse: - push %rbp - mov %rsp,%rbp - movslq %esi,%rsi - lea (%rdi,%rsi),%rax - sub $96,%rsp - movdqa 32(%rdx),%xmm0 - movdqa 112(%rdx),%xmm9 - movdqa 48(%rdx),%xmm1 - movdqa 80(%rdx),%xmm7 - movdqa %xmm0,%xmm2 - punpcklwd 96(%rdx),%xmm2 - punpckhwd 96(%rdx),%xmm0 - movdqa %xmm9,%xmm8 - movdqa 16(%rdx),%xmm5 - movdqa %xmm2,%xmm3 - movdqa %xmm2,%xmm6 - movdqa %xmm0,%xmm2 - pmaddwd .LC1(%rip),%xmm3 - movdqa %xmm0,%xmm4 - pmaddwd .LC1(%rip),%xmm2 - pmaddwd .LC0(%rip),%xmm4 - punpckhwd %xmm1,%xmm8 - pmaddwd .LC0(%rip),%xmm6 - movaps %xmm3,-48(%rbp) - movdqa (%rdx),%xmm3 - movaps %xmm2,-64(%rbp) - movdqa 64(%rdx),%xmm2 - movdqa %xmm3,%xmm0 - movaps %xmm4,-32(%rbp) - paddw %xmm2,%xmm0 - psubw %xmm2,%xmm3 - movaps %xmm6,-16(%rbp) - movdqa %xmm0,%xmm4 - pxor %xmm0,%xmm0 - movdqa %xmm0,%xmm11 - movdqa %xmm0,%xmm12 - movdqa %xmm0,%xmm2 - punpcklwd %xmm4,%xmm11 - punpckhwd %xmm3,%xmm12 - punpcklwd %xmm3,%xmm2 - movdqa %xmm11,%xmm13 - movdqa %xmm0,%xmm11 - movdqa %xmm12,%xmm3 - punpckhwd %xmm4,%xmm11 - movdqa %xmm8,%xmm12 - movdqa %xmm8,%xmm4 - movdqa %xmm11,%xmm14 - movdqa %xmm7,%xmm8 - movdqa %xmm9,%xmm11 - punpckhwd %xmm5,%xmm8 - psrad $4,%xmm3 - punpcklwd %xmm1,%xmm11 - psrad $4,%xmm13 - psrad $4,%xmm14 - movdqa %xmm11,%xmm15 - movaps %xmm13,-80(%rbp) - movdqa %xmm8,%xmm6 - paddw %xmm7,%xmm1 - pmaddwd .LC3(%rip),%xmm15 - movaps %xmm14,-96(%rbp) - movdqa %xmm8,%xmm14 - movdqa %xmm5,%xmm8 - pmaddwd .LC2(%rip),%xmm11 - pmaddwd .LC2(%rip),%xmm12 - paddw %xmm9,%xmm8 - psrad $4,%xmm2 - pmaddwd .LC3(%rip),%xmm4 - pmaddwd .LC5(%rip),%xmm6 - pmaddwd .LC4(%rip),%xmm14 - movdqa %xmm4,%xmm10 - movdqa %xmm7,%xmm4 - movdqa %xmm8,%xmm7 - punpcklwd %xmm5,%xmm4 - punpcklwd %xmm1,%xmm7 - punpckhwd %xmm1,%xmm8 - movdqa %xmm4,%xmm13 - movdqa %xmm7,%xmm9 - pmaddwd .LC5(%rip),%xmm4 - pmaddwd .LC6(%rip),%xmm9 - movdqa %xmm8,%xmm5 - movdqa %xmm7,%xmm1 - pmaddwd .LC7(%rip),%xmm8 - pmaddwd .LC6(%rip),%xmm5 - movdqa %xmm15,%xmm7 - paddd %xmm9,%xmm11 - paddd %xmm9,%xmm4 - movdqa .LC8(%rip),%xmm9 - paddd %xmm8,%xmm14 - paddd %xmm10,%xmm8 - movdqa -96(%rbp),%xmm10 - paddd -64(%rbp),%xmm10 - pmaddwd .LC7(%rip),%xmm1 - pmaddwd .LC4(%rip),%xmm13 - paddd %xmm5,%xmm12 - paddd %xmm5,%xmm6 - paddd %xmm9,%xmm10 - movdqa -80(%rbp),%xmm5 - paddd -48(%rbp),%xmm5 - paddd %xmm1,%xmm13 - paddd %xmm1,%xmm7 - movdqa %xmm10,%xmm1 - psubd %xmm6,%xmm10 - paddd %xmm9,%xmm5 - paddd %xmm6,%xmm1 - psrad $10,%xmm10 - movdqa -16(%rbp),%xmm6 - movdqa %xmm1,%xmm15 - movdqa %xmm5,%xmm1 - psubd %xmm4,%xmm5 - psrad $10,%xmm5 - paddd %xmm4,%xmm1 - paddd %xmm2,%xmm6 - packssdw %xmm10,%xmm5 - movdqa -32(%rbp),%xmm10 - paddd %xmm9,%xmm6 - paddd %xmm9,%xmm2 - psrad $10,%xmm15 - psrad $10,%xmm1 - psubd -16(%rbp),%xmm2 - paddd %xmm3,%xmm10 - paddd %xmm9,%xmm3 - packssdw %xmm15,%xmm1 - paddd %xmm9,%xmm10 - psubd -32(%rbp),%xmm3 - movdqa %xmm10,%xmm4 - psubd %xmm8,%xmm10 - paddd %xmm8,%xmm4 - psrad $10,%xmm10 - movdqa %xmm4,%xmm15 - movdqa %xmm6,%xmm4 - psubd %xmm7,%xmm6 - psrad $10,%xmm6 - psrad $10,%xmm15 - paddd %xmm7,%xmm4 - movdqa %xmm3,%xmm7 - psubd %xmm14,%xmm3 - packssdw %xmm10,%xmm6 - psrad $10,%xmm3 - psrad $10,%xmm4 - paddd %xmm14,%xmm7 - movdqa %xmm7,%xmm8 - movdqa %xmm2,%xmm7 - psubd %xmm13,%xmm2 - paddd %xmm13,%xmm7 - psrad $10,%xmm8 - packssdw %xmm15,%xmm4 - psrad $10,%xmm7 - psrad $10,%xmm2 - packssdw %xmm8,%xmm7 - movdqa -80(%rbp),%xmm8 - packssdw %xmm3,%xmm2 - paddd %xmm9,%xmm8 - paddd -96(%rbp),%xmm9 - psubd -48(%rbp),%xmm8 - psubd -64(%rbp),%xmm9 - movdqa %xmm8,%xmm3 - movdqa %xmm9,%xmm10 - psubd %xmm11,%xmm8 - paddd %xmm12,%xmm10 - paddd %xmm11,%xmm3 - psrad $10,%xmm8 - psrad $10,%xmm10 - psrad $10,%xmm3 - psubd %xmm12,%xmm9 - psrad $10,%xmm9 - packssdw %xmm10,%xmm3 - movdqa %xmm1,%xmm10 - packssdw %xmm9,%xmm8 - movdqa %xmm7,%xmm9 - punpckhwd %xmm6,%xmm7 - punpcklwd %xmm6,%xmm9 - punpcklwd %xmm8,%xmm10 - punpckhwd %xmm8,%xmm1 - movdqa %xmm3,%xmm6 - movdqa %xmm4,%xmm8 - punpckhwd %xmm5,%xmm3 - punpcklwd %xmm5,%xmm6 - punpcklwd %xmm2,%xmm8 - movdqa %xmm3,%xmm5 - punpckhwd %xmm2,%xmm4 - movdqa %xmm8,%xmm3 - movdqa %xmm10,%xmm2 - punpckhwd %xmm6,%xmm8 - punpcklwd %xmm6,%xmm3 - punpcklwd %xmm9,%xmm2 - movdqa %xmm8,%xmm6 - movdqa %xmm4,%xmm8 - punpckhwd %xmm9,%xmm10 - punpcklwd %xmm5,%xmm8 - punpckhwd %xmm5,%xmm4 - movdqa %xmm2,%xmm5 - punpcklwd %xmm3,%xmm5 - punpckhwd %xmm3,%xmm2 - movdqa %xmm1,%xmm15 - movdqa %xmm10,%xmm3 - punpckhwd %xmm7,%xmm1 - punpckhwd %xmm6,%xmm10 - punpcklwd %xmm6,%xmm3 - movdqa %xmm1,%xmm6 - punpckhwd %xmm4,%xmm1 - punpcklwd %xmm4,%xmm6 - movdqa %xmm3,%xmm4 - punpcklwd %xmm7,%xmm15 - punpcklwd %xmm6,%xmm4 - punpckhwd %xmm6,%xmm3 - movdqa %xmm15,%xmm7 - movdqa %xmm4,%xmm6 - punpcklwd %xmm8,%xmm7 - movdqa %xmm3,%xmm11 - movdqa %xmm4,%xmm12 - movdqa %xmm3,%xmm4 - movdqa %xmm5,%xmm3 - paddw %xmm7,%xmm3 - movdqa %xmm1,%xmm9 - punpckhwd %xmm8,%xmm15 - punpcklwd %xmm10,%xmm9 - psubw %xmm7,%xmm5 - movdqa %xmm15,%xmm7 - movdqa %xmm9,%xmm14 - punpcklwd %xmm2,%xmm7 - movdqa %xmm1,%xmm8 - pmaddwd .LC0(%rip),%xmm6 - punpckhwd %xmm10,%xmm8 - paddw %xmm15,%xmm10 - movaps %xmm6,-16(%rbp) - pmaddwd .LC1(%rip),%xmm4 - movdqa %xmm0,%xmm6 - pmaddwd .LC0(%rip),%xmm11 - pmaddwd .LC2(%rip),%xmm14 - pmaddwd .LC1(%rip),%xmm12 - pmaddwd .LC3(%rip),%xmm9 - movaps %xmm4,-64(%rbp) - movdqa %xmm3,%xmm4 - movdqa %xmm0,%xmm3 - punpckhwd %xmm4,%xmm6 - punpcklwd %xmm4,%xmm3 - movdqa %xmm0,%xmm4 - movaps %xmm11,-32(%rbp) - movdqa %xmm6,%xmm13 - movdqa %xmm15,%xmm6 - punpcklwd %xmm5,%xmm4 - movaps %xmm12,-48(%rbp) - punpckhwd %xmm2,%xmm6 - paddw %xmm1,%xmm2 - punpckhwd %xmm5,%xmm0 - movdqa %xmm14,%xmm11 - movdqa %xmm2,%xmm5 - movdqa %xmm7,%xmm14 - punpckhwd %xmm10,%xmm2 - psrad $4,%xmm13 - punpcklwd %xmm10,%xmm5 - movaps %xmm13,-80(%rbp) - movdqa %xmm8,%xmm12 - movdqa %xmm5,%xmm10 - pmaddwd .LC4(%rip),%xmm14 - pmaddwd .LC6(%rip),%xmm10 - movdqa %xmm2,%xmm15 - pmaddwd .LC7(%rip),%xmm5 - pmaddwd .LC3(%rip),%xmm8 - pmaddwd .LC5(%rip),%xmm7 - movdqa %xmm14,%xmm13 - movdqa %xmm6,%xmm14 - paddd %xmm5,%xmm13 - paddd %xmm5,%xmm9 - pmaddwd .LC5(%rip),%xmm6 - psrad $4,%xmm3 - pmaddwd .LC6(%rip),%xmm15 - paddd %xmm10,%xmm7 - paddd %xmm10,%xmm11 - psrad $4,%xmm4 - pmaddwd .LC2(%rip),%xmm12 - psrad $4,%xmm0 - pmaddwd .LC4(%rip),%xmm14 - pmaddwd .LC7(%rip),%xmm2 - movdqa -80(%rbp),%xmm5 - paddd %xmm15,%xmm12 - paddd -64(%rbp),%xmm5 - paddd %xmm2,%xmm14 - paddd %xmm8,%xmm2 - movdqa -48(%rbp),%xmm8 - paddd %xmm6,%xmm15 - movdqa .LC9(%rip),%xmm6 - paddd %xmm3,%xmm8 - paddd %xmm6,%xmm8 - paddd %xmm6,%xmm5 - movdqa %xmm5,%xmm10 - movdqa %xmm8,%xmm1 - psubd %xmm15,%xmm5 - psubd %xmm7,%xmm8 - psrad $17,%xmm5 - paddd %xmm7,%xmm1 - movdqa -32(%rbp),%xmm7 - psrad $17,%xmm8 - paddd %xmm15,%xmm10 - paddd %xmm6,%xmm3 - packssdw %xmm5,%xmm8 - movdqa -16(%rbp),%xmm5 - paddd %xmm0,%xmm7 - paddd %xmm6,%xmm0 - paddd %xmm6,%xmm7 - psrad $17,%xmm10 - psubd -32(%rbp),%xmm0 - paddd %xmm4,%xmm5 - psrad $17,%xmm1 - movdqa %xmm7,%xmm15 - paddd %xmm6,%xmm5 - packssdw %xmm10,%xmm1 - psubd %xmm2,%xmm7 - movdqa %xmm5,%xmm10 - paddd %xmm6,%xmm4 - psubd %xmm9,%xmm5 - psubd -16(%rbp),%xmm4 - psrad $17,%xmm7 - paddd %xmm2,%xmm15 - psrad $17,%xmm5 - psubd -48(%rbp),%xmm3 - paddd -80(%rbp),%xmm6 - packssdw %xmm7,%xmm5 - movdqa %xmm4,%xmm2 - movdqa %xmm0,%xmm7 - psubd -64(%rbp),%xmm6 - paddd %xmm14,%xmm7 - psrad $17,%xmm15 - paddd %xmm13,%xmm2 - psubd %xmm14,%xmm0 - psrad $17,%xmm7 - psubd %xmm13,%xmm4 - psrad $17,%xmm0 - paddd %xmm9,%xmm10 - psrad $17,%xmm2 - psrad $17,%xmm4 - packuswb %xmm8,%xmm5 - packssdw %xmm0,%xmm4 - packssdw %xmm7,%xmm2 - movdqa %xmm3,%xmm0 - movdqa %xmm6,%xmm7 - psrad $17,%xmm10 - paddd %xmm11,%xmm0 - paddd %xmm12,%xmm7 - psubd %xmm12,%xmm6 - packssdw %xmm15,%xmm10 - psubd %xmm11,%xmm3 - psrad $17,%xmm7 - packuswb %xmm10,%xmm1 - psrad $17,%xmm0 - psrad $17,%xmm6 - psrad $17,%xmm3 - packssdw %xmm7,%xmm0 - packssdw %xmm6,%xmm3 - packuswb %xmm0,%xmm2 - movdqa %xmm1,%xmm0 - packuswb %xmm4,%xmm3 - movdqa %xmm2,%xmm4 - punpckhbw %xmm5,%xmm2 - punpcklbw %xmm3,%xmm0 - punpcklbw %xmm5,%xmm4 - punpckhbw %xmm3,%xmm1 - movdqa %xmm2,%xmm3 - movdqa %xmm0,%xmm2 - movdqa %xmm1,%xmm5 - punpcklbw %xmm4,%xmm2 - punpckhbw %xmm4,%xmm0 - punpcklbw %xmm3,%xmm5 - movdqa %xmm2,%xmm4 - punpckhbw %xmm5,%xmm2 - punpckhbw %xmm3,%xmm1 - punpcklbw %xmm5,%xmm4 - movdqa %xmm0,%xmm3 - punpckhbw %xmm1,%xmm0 - movq %xmm4,(%rdi) - pshufd $78,%xmm4,%xmm4 - punpcklbw %xmm1,%xmm3 - movq %xmm4,(%rax) - add %rsi,%rax - movq %xmm2,(%rax) - add %rsi,%rax - pshufd $78,%xmm2,%xmm2 - movq %xmm2,(%rax) - add %rsi,%rax - movq %xmm3,(%rax) - add %rsi,%rax - pshufd $78,%xmm3,%xmm3 - movq %xmm3,(%rax) - movq %xmm0,(%rax,%rsi) - pshufd $78,%xmm0,%xmm0 - movq %xmm0,(%rax,%rsi,2) - leave - ret - .endfn stbi__idct_simd$sse,globl - - .rodata.cst16 -.LC0: .value 2217,-5350,2217,-5350,2217,-5350,2217,-5350 -.LC1: .value 5352,2217,5352,2217,5352,2217,5352,2217 -.LC2: .value -6811,-8034,-6811,-8034,-6811,-8034,-6811,-8034 -.LC3: .value -8034,4552,-8034,4552,-8034,4552,-8034,4552 -.LC4: .value 6813,-1597,6813,-1597,6813,-1597,6813,-1597 -.LC5: .value -1597,4552,-1597,4552,-1597,4552,-1597,4552 -.LC6: .value 1131,4816,1131,4816,1131,4816,1131,4816 -.LC7: .value 4816,-5681,4816,-5681,4816,-5681,4816,-5681 -.LC8: .long 0x200,0x200,0x200,0x200 -.LC9: .long 0x1010000,0x1010000,0x1010000,0x1010000 diff --git a/third_party/stb/internal.h b/third_party/stb/internal.h deleted file mode 100644 index 42d1ee1ed..000000000 --- a/third_party/stb/internal.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef COSMOPOLITAN_THIRD_PARTY_STB_INTERNAL_H_ -#define COSMOPOLITAN_THIRD_PARTY_STB_INTERNAL_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -void stbi__YCbCr_to_RGB_row(unsigned char *, const unsigned char *, - const unsigned char *, const unsigned char *, - unsigned, unsigned) _Hide; -int stbi__YCbCr_to_RGB_row$sse2(unsigned char *, const unsigned char *, - const unsigned char *, const unsigned char *, - unsigned) _Hide; -void stbi__idct_simd$sse(unsigned char *out, int out_stride, - short data[64]) _Hide; -void stbi__idct_simd$avx(unsigned char *out, int out_stride, - short data[64]) _Hide; - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_THIRD_PARTY_STB_INTERNAL_H_ */ diff --git a/third_party/stb/stb_image.c b/third_party/stb/stb_image.c index 9005b8df0..9a476035e 100644 --- a/third_party/stb/stb_image.c +++ b/third_party/stb/stb_image.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "third_party/stb/stb_image.h" #include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/fmt/conv.h" @@ -31,20 +32,33 @@ #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/x/x.h" -#include "third_party/stb/internal.h" -#include "third_party/stb/stb_image.h" - -#define ROL(w, k) ((w) << (k) | CheckUnsigned(w) >> (sizeof(w) * 8 - (k))) +#include "third_party/aarch64/arm_neon.h" +#include "third_party/intel/ammintrin.internal.h" asm(".ident\t\"\\n\\n\ stb_image (Public Domain)\\n\ Credit: Sean Barrett, et al.\\n\ http://nothings.org/stb\""); +#ifdef __x86_64__ +#define STBI_SSE2 +#define idct_block_kernel stbi__idct_simd +#elif defined(__aarch64__) +#define STBI_NEON +#define idct_block_kernel stbi__idct_simd +#else +#define idct_block_kernel stbi__idct_block +#endif + +#define ROL(w, k) ((w) << (k) | CheckUnsigned(w) >> (sizeof(w) * 8 - (k))) + #ifndef STBI_REALLOC_SIZED #define STBI_REALLOC_SIZED(p, oldsz, newsz) realloc(p, newsz) #endif +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + // stbi__context structure is our basic context used by all images, so it // contains all the IO context, plus some basic image information typedef struct { @@ -1219,6 +1233,556 @@ forceinline unsigned char stbi__clamp(int x) { return (unsigned char)x; } +#define stbi__f2f(x) ((int)(((x)*4096 + 0.5))) +#define stbi__fsh(x) ((x)*4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7) \ + int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2 + p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3 * stbi__f2f(-1.847759065f); \ + t3 = p1 + p2 * stbi__f2f(0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2 + p3); \ + t1 = stbi__fsh(p2 - p3); \ + x0 = t0 + t3; \ + x3 = t0 - t3; \ + x1 = t1 + t2; \ + x2 = t1 - t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0 + t2; \ + p4 = t1 + t3; \ + p1 = t0 + t3; \ + p2 = t1 + t2; \ + p5 = (p3 + p4) * stbi__f2f(1.175875602f); \ + t0 = t0 * stbi__f2f(0.298631336f); \ + t1 = t1 * stbi__f2f(2.053119869f); \ + t2 = t2 * stbi__f2f(3.072711026f); \ + t3 = t3 * stbi__f2f(1.501321110f); \ + p1 = p5 + p1 * stbi__f2f(-0.899976223f); \ + p2 = p5 + p2 * stbi__f2f(-2.562915447f); \ + p3 = p3 * stbi__f2f(-1.961570560f); \ + p4 = p4 * stbi__f2f(-0.390180644f); \ + t3 += p1 + p4; \ + t2 += p2 + p3; \ + t1 += p2 + p4; \ + t0 += p1 + p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) { + int i, val[64], *v = val; + stbi_uc *o; + short *d = data; + + // columns + for (i = 0; i < 8; ++i, ++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && + d[48] == 0 && d[56] == 0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * 4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; + x1 += 512; + x2 += 512; + x3 += 512; + v[0] = (x0 + t3) >> 10; + v[56] = (x0 - t3) >> 10; + v[8] = (x1 + t2) >> 10; + v[48] = (x1 - t2) >> 10; + v[16] = (x2 + t1) >> 10; + v[40] = (x2 - t1) >> 10; + v[24] = (x3 + t0) >> 10; + v[32] = (x3 - t0) >> 10; + } + } + + for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128 << 17); + x1 += 65536 + (128 << 17); + x2 += 65536 + (128 << 17); + x3 += 65536 + (128 << 17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0 + t3) >> 17); + o[7] = stbi__clamp((x0 - t3) >> 17); + o[1] = stbi__clamp((x1 + t2) >> 17); + o[6] = stbi__clamp((x1 - t2) >> 17); + o[2] = stbi__clamp((x2 + t1) >> 17); + o[5] = stbi__clamp((x2 - t1) >> 17); + o[3] = stbi__clamp((x3 + t0) >> 17); + o[4] = stbi__clamp((x3 - t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + +// dot product constant: even elems=x, odd elems=y +#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y)) + +// out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) +// out(1) = c1[even]*x + c1[odd]*y +#define dct_rot(out0, out1, x, y, c0, c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x), (y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x), (y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + +// out = in << 12 (in 16-bit, out 32-bit) +#define dct_widen(out, in) \ + __m128i out##_l = \ + _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = \ + _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + +// wide add +#define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + +// butterfly a/b, add bias, then shift by "s" and pack +#define dct_bfly32o(out0, out1, a, b, bias, s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = \ + _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = \ + _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + +// 8-bit interleave step (for transposes) +#define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + +// 16-bit interleave step (for transposes) +#define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + +#define dct_pass(bias, shift) \ + { \ + /* even part */ \ + dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1); \ + dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0, row7, x0, x7, bias, shift); \ + dct_bfly32o(row1, row6, x1, x6, bias, shift); \ + dct_bfly32o(row2, row5, x2, x5, bias, shift); \ + dct_bfly32o(row3, row4, x3, x4, bias, shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), + stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), + stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), + stbi__f2f(1.175875602f)); + __m128i rot1_1 = + dct_const(stbi__f2f(1.175875602f), + stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), + stbi__f2f(-1.961570560f)); + __m128i rot2_1 = + dct_const(stbi__f2f(-1.961570560f), + stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), + stbi__f2f(-0.390180644f)); + __m128i rot3_1 = + dct_const(stbi__f2f(-0.390180644f), + stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17)); + + // load + row0 = _mm_load_si128((const __m128i *)(data + 0 * 8)); + row1 = _mm_load_si128((const __m128i *)(data + 1 * 8)); + row2 = _mm_load_si128((const __m128i *)(data + 2 * 8)); + row3 = _mm_load_si128((const __m128i *)(data + 3 * 8)); + row4 = _mm_load_si128((const __m128i *)(data + 4 * 8)); + row5 = _mm_load_si128((const __m128i *)(data + 5 * 8)); + row6 = _mm_load_si128((const __m128i *)(data + 6 * 8)); + row7 = _mm_load_si128((const __m128i *)(data + 7 * 8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *)out, p0); + out += out_stride; + _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p0, 0x4e)); + out += out_stride; + _mm_storel_epi64((__m128i *)out, p2); + out += out_stride; + _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p2, 0x4e)); + out += out_stride; + _mm_storel_epi64((__m128i *)out, p1); + out += out_stride; + _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p1, 0x4e)); + out += out_stride; + _mm_storel_epi64((__m128i *)out, p3); + out += out_stride; + _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0, out1, a, b, shiftop, s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0, row7, x0, x7, shiftop, shift); \ + dct_bfly32o(row1, row6, x1, x6, shiftop, shift); \ + dct_bfly32o(row2, row5, x2, x5, shiftop, shift); \ + dct_bfly32o(row3, row4, x3, x4, shiftop, shift); \ + } + + // load + row0 = vld1q_s16(data + 0 * 8); + row1 = vld1q_s16(data + 1 * 8); + row2 = vld1q_s16(data + 2 * 8); + row3 = vld1q_s16(data + 3 * 8); + row4 = vld1q_s16(data + 4 * 8); + row5 = vld1q_s16(data + 5 * 8); + row6 = vld1q_s16(data + 6 * 8); + row7 = vld1q_s16(data + 7 * 8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) \ + { \ + int16x8x2_t t = vtrnq_s16(x, y); \ + x = t.val[0]; \ + y = t.val[1]; \ + } +#define dct_trn32(x, y) \ + { \ + int32x4x2_t t = \ + vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); \ + x = vreinterpretq_s16_s32(t.val[0]); \ + y = vreinterpretq_s16_s32(t.val[1]); \ + } +#define dct_trn64(x, y) \ + { \ + int16x8_t x0 = x; \ + int16x8_t y0 = y; \ + x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); \ + y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); \ + } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) \ + { \ + uint8x8x2_t t = vtrn_u8(x, y); \ + x = t.val[0]; \ + y = t.val[1]; \ + } +#define dct_trn8_16(x, y) \ + { \ + uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); \ + x = vreinterpret_u8_u16(t.val[0]); \ + y = vreinterpret_u8_u16(t.val[1]); \ + } +#define dct_trn8_32(x, y) \ + { \ + uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); \ + x = vreinterpret_u8_u32(t.val[0]); \ + y = vreinterpret_u8_u32(t.val[1]); \ + } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); + out += out_stride; + vst1_u8(out, p1); + out += out_stride; + vst1_u8(out, p2); + out += out_stride; + vst1_u8(out, p3); + out += out_stride; + vst1_u8(out, p4); + out += out_stride; + vst1_u8(out, p5); + out += out_stride; + vst1_u8(out, p6); + out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + #define STBI__MARKER_none 0xff // if there's a pending marker from the entropy stream, return that // otherwise, fetch from the stream and get a marker. if there's no @@ -1275,7 +1839,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z) { z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - stbi__idct_simd$sse( + idct_block_kernel( z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data); // every data block is an MCU, so countdown the restart interval @@ -1309,7 +1873,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z) { z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - stbi__idct_simd$sse( + idct_block_kernel( z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data); } @@ -1411,7 +1975,7 @@ static void stbi__jpeg_finish(stbi__jpeg *z) { short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); - stbi__idct_simd$sse( + idct_block_kernel( z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data); } @@ -1905,6 +2469,203 @@ static unsigned char *stbi__resample_row_nearest(unsigned char *out, return out; } +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, + const stbi_uc *pcb, const stbi_uc *pcr, + int count, int step) { + int i; + for (i = 0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1 << 19); // rounding + int r, g, b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr * stbi__float2fixed(1.40200f); + g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb * stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned)r > 255) { + if (r < 0) + r = 0; + else + r = 255; + } + if ((unsigned)g > 255) { + if (g < 0) + g = 0; + else + g = 255; + } + if ((unsigned)b > 255) { + if (b < 0) + b = 0; + else + b = 255; + } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, + stbi_uc const *pcb, stbi_uc const *pcr, + int count, int step) { + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f)); + __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f)); + __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f)); + __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f)); + __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i + 7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *)(y + i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *)(pcr + i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *)(pcb + i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *)(out + 0), o0); + _mm_storeu_si128((__m128i *)(out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f)); + int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f)); + int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f)); + int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f)); + + for (; i + 7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8 * 4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1 << 19); // rounding + int r, g, b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr * stbi__float2fixed(1.40200f); + g = y_fixed + cr * -stbi__float2fixed(0.71414f) + + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb * stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned)r > 255) { + if (r < 0) + r = 0; + else + r = 255; + } + if ((unsigned)g > 255) { + if (g < 0) + g = 0; + else + g = 255; + } + if ((unsigned)b > 255) { + if (b < 0) + b = 0; + else + b = 255; + } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + // set up the kernels static void stbi__setup_jpeg(stbi__jpeg *j) { j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; diff --git a/third_party/stb/ycbcr-sse2.S b/third_party/stb/ycbcr-sse2.S deleted file mode 100644 index e67d65a7e..000000000 --- a/third_party/stb/ycbcr-sse2.S +++ /dev/null @@ -1,93 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - - .balign 16 -stbi__YCbCr_to_RGB_row$sse2: - .leafprologue - .profilable - xor %eax,%eax - cmp $8,%r8d - jl 1f - xor %eax,%eax - movdqa 2f(%rip),%xmm2 - movdqa 3f(%rip),%xmm8 - movdqa 4f(%rip),%xmm9 - movdqa 5f(%rip),%xmm10 - movdqa 6f(%rip),%xmm4 - movdqa 7f(%rip),%xmm5 - .balign 16 -0: movq (%rsi,%rax),%xmm6 - movq (%rcx,%rax),%xmm7 - movq (%rdx,%rax),%xmm1 - movdqa %xmm2,%xmm0 - punpcklbw %xmm6,%xmm0 - pxor %xmm2,%xmm7 - pxor %xmm6,%xmm6 - punpcklbw %xmm7,%xmm6 - pxor %xmm2,%xmm1 - pxor %xmm3,%xmm3 - punpcklbw %xmm1,%xmm3 - psrlw $4,%xmm0 - movdqa %xmm6,%xmm7 - pmulhw %xmm8,%xmm7 - movdqa %xmm3,%xmm1 - pmulhw %xmm9,%xmm1 - pmulhw %xmm10,%xmm3 - pmulhw %xmm4,%xmm6 - paddw %xmm1,%xmm6 - paddw %xmm0,%xmm7 - paddw %xmm0,%xmm3 - paddw %xmm0,%xmm6 - psraw $4,%xmm7 - psraw $4,%xmm3 - packuswb %xmm3,%xmm7 - psraw $4,%xmm6 - packuswb %xmm5,%xmm6 - movdqa %xmm7,%xmm0 - punpcklbw %xmm6,%xmm0 - punpckhbw %xmm6,%xmm7 - movdqa %xmm0,%xmm1 - punpcklwd %xmm7,%xmm1 - punpckhwd %xmm7,%xmm0 - movdqu %xmm1,(%rdi,%rax,4) - movdqu %xmm0,16(%rdi,%rax,4) - add $8,%rax - lea 7(%rax),%r9d - cmp %r8d,%r9d - jl 0b -1: .leafepilogue - .endfn stbi__YCbCr_to_RGB_row$sse2,globl - - .rodata.cst16 -2: .byte 128,128,128,128,128,128,128,128 - .zero 8 -3: .short 5743,5743,5743,5743,5743,5743,5743,5743 -4: .short 64126,64126,64126,64126,64126,64126,64126,64126 -5: .short 7258,7258,7258,7258,7258,7258,7258,7258 -6: .short 62611,62611,62611,62611,62611,62611,62611,62611 -7: .short 255,255,255,255,255,255,255,255 - - .end -// These should be better but need to get them to work -3: .short 11485,11485,11485,11485,11485,11485,11485,11485 # J′R m=13 99.964387% -4: .short -11277,-11277,-11277,-11277,-11277,-11277,-11277,-11277 # J′G m=15 99.935941% -5: .short 14516,14516,14516,14516,14516,14516,14516,14516 # J′B m=13 99.947219% -6: .short -23401,-23401,-23401,-23401,-23401,-23401,-23401,-23401 # J′G m=15 99.935941% -7: .short 255,255,255,255,255,255,255,255 diff --git a/third_party/stb/ycbcr.c b/third_party/stb/ycbcr.c deleted file mode 100644 index a2053fda4..000000000 --- a/third_party/stb/ycbcr.c +++ /dev/null @@ -1,56 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/limits.h" -#include "libc/log/check.h" -#include "libc/log/log.h" -#include "libc/macros.internal.h" -#include "libc/str/str.h" -#include "third_party/stb/internal.h" - -/* this is a reduced-precision calculation of YCbCr-to-RGB introduced - to make sure the code produces the same results in both SIMD and scalar */ -#define FLOAT2FIXED(x) (((int)((x)*4096.0f + .5f)) << 8) - -void stbi__YCbCr_to_RGB_row(unsigned char *out, const unsigned char *y, - const unsigned char *pcb, const unsigned char *pcr, - unsigned count, unsigned step) { - unsigned i; - unsigned char b4[4]; - int y_fixed, r, g, b, cr, cb; - CHECK(step == 3 || step == 4); - CHECK_LE(count, INT_MAX / 4 - 4); - for (i = step == 4 ? stbi__YCbCr_to_RGB_row$sse2(out, y, pcb, pcr, count) : 0; - i < count; ++i) { - y_fixed = (y[i] << 20) + (1 << 19); /* rounding */ - cr = pcr[i] - 128; - cb = pcb[i] - 128; - r = y_fixed + cr * FLOAT2FIXED(1.40200f); - g = y_fixed + (cr * -FLOAT2FIXED(0.71414f)) + - ((cb * -FLOAT2FIXED(0.34414f)) & 0xffff0000); - b = y_fixed + cb * FLOAT2FIXED(1.77200f); - r >>= 20; - g >>= 20; - b >>= 20; - b4[0] = MIN(255, MAX(0, r)); - b4[1] = MIN(255, MAX(0, g)); - b4[2] = MIN(255, MAX(0, b)); - b4[3] = 255; - memcpy(out + i * step, b4, 4); - } -} diff --git a/tool/build/build.mk b/tool/build/build.mk index 62e820f29..906e47b0e 100644 --- a/tool/build/build.mk +++ b/tool/build/build.mk @@ -83,8 +83,9 @@ o/$(MODE)/tool/build/build.pkg: \ o/$(MODE)/%.ctest.ok: \ %.ctest \ - $(TOOL_BUILD_CALCULATOR) - @$(COMPILE) -AMKWIDES -wtT$@ $(TOOL_BUILD_CALCULATOR) $< + $(TOOL_BUILD_CALCULATOR) \ + $(VM) + @$(COMPILE) -AMKWIDES -wtT$@ $(VM) $(TOOL_BUILD_CALCULATOR) $< o/$(MODE)/tool/build/%.com.dbg: \ $(TOOL_BUILD_DEPS) \ @@ -97,7 +98,8 @@ o/$(MODE)/tool/build/%.com.dbg: \ o/$(MODE)/tool/build/blinkenlights.com: \ o/$(MODE)/tool/build/blinkenlights.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ - o/$(MODE)/tool/build/symtab.com + o/$(MODE)/tool/build/symtab.com \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) diff --git a/tool/build/compile.c b/tool/build/compile.c index 5f09c5699..98a6737a9 100644 --- a/tool/build/compile.c +++ b/tool/build/compile.c @@ -479,7 +479,7 @@ void AddArg(char *s) { AddStr(&args, s); } -int GetBaseCpuFreqMhz(void) { +static int GetBaseCpuFreqMhz(void) { return KCPUIDS(16H, EAX) & 0x7fff; } @@ -488,6 +488,7 @@ void SetCpuLimit(int secs) { struct rlimit rlim; if (secs <= 0) return; if (IsWindows()) return; +#ifdef __x86_64__ if (!(mhz = GetBaseCpuFreqMhz())) return; lim = ceil(3100. / mhz * secs); rlim.rlim_cur = lim; @@ -499,6 +500,7 @@ void SetCpuLimit(int secs) { setrlimit(RLIMIT_CPU, &rlim); } } +#endif } void SetFszLimit(long n) { diff --git a/tool/build/lib/buildlib.mk b/tool/build/lib/buildlib.mk index edf860840..07ff037a1 100644 --- a/tool/build/lib/buildlib.mk +++ b/tool/build/lib/buildlib.mk @@ -68,9 +68,11 @@ $(TOOL_BUILD_LIB_A).pkg: \ $(TOOL_BUILD_LIB_A_OBJS) \ $(foreach x,$(TOOL_BUILD_LIB_A_DIRECTDEPS),$($(x)_A).pkg) +ifeq ($(ARCH), x86_64) o/$(MODE)/tool/build/lib/ssefloat.o: private \ TARGET_ARCH += \ -msse3 +endif o/$(MODE)/tool/build/lib/apetest.com.dbg: \ $(TOOL_BUILD_LIB_A_DEPS) \ @@ -95,6 +97,10 @@ o/$(MODE)/tool/build/lib/apetest.o: \ tool/build/lib/apetest.c \ libc/calls/calls.h +# these assembly files are safe to build on aarch64 +o/$(MODE)/tool/build/lib/errnos.o: tool/build/lib/errnos.S + @$(COMPILE) -AOBJECTIFY.S $(OBJECTIFY.S) $(OUTPUT_OPTION) -c $< + TOOL_BUILD_LIB_LIBS = $(foreach x,$(TOOL_BUILD_LIB_ARTIFACTS),$($(x))) TOOL_BUILD_LIB_SRCS = $(foreach x,$(TOOL_BUILD_LIB_ARTIFACTS),$($(x)_SRCS)) TOOL_BUILD_LIB_HDRS = $(foreach x,$(TOOL_BUILD_LIB_ARTIFACTS),$($(x)_HDRS)) diff --git a/tool/build/pledge.c b/tool/build/pledge.c index 2a242d6e3..fa24f16ae 100644 --- a/tool/build/pledge.c +++ b/tool/build/pledge.c @@ -318,16 +318,20 @@ int SetLimit(int r, long lo, long hi) { return setrlimit(r, &lim); } -int GetBaseCpuFreqMhz(void) { +static int GetBaseCpuFreqMhz(void) { return KCPUIDS(16H, EAX) & 0x7fff; } int SetCpuLimit(int secs) { +#ifdef __x86_64__ int mhz, lim; if (secs <= 0) return 0; if (!(mhz = GetBaseCpuFreqMhz())) return eopnotsupp(); lim = ceil(3100. / mhz * secs); return SetLimit(RLIMIT_CPU, lim, lim); +#else + return 0; +#endif } bool PathExists(const char *path) { @@ -772,7 +776,8 @@ int main(int argc, char *argv[]) { if (!(~ipromises & (1ul << PROMISE_EXEC))) { g_promises = xstrcat(g_promises, ' ', "exec"); if (!g_qflag) { - __pledge_mode |= PLEDGE_STDERR_LOGGING; + // TODO(jart): Fix me. + // __pledge_mode |= PLEDGE_STDERR_LOGGING; } } if (isdynamic) { diff --git a/tool/decode/lib/pollnames.S b/tool/decode/lib/pollnames.S deleted file mode 100644 index 04468259c..000000000 --- a/tool/decode/lib/pollnames.S +++ /dev/null @@ -1,73 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "ape/relocations.h" -#include "ape/relocations.h" -#include "libc/macros.internal.h" - - .Lrows = 0 # w/ 2 cols - - .macro .tab sym:req str - .section .rodata.str1.1,"aSM",@progbits,1 -.L\@: .asciz "\str" - .previous - .long RVA(\sym) - .long RVA(.L\@) - .Lrows = .Lrows + 1 - .endm - - .initro 301,_init_kPollNames -kPollNamesRo: - .tab POLLNVAL "POLLNVAL" - .tab POLLWRNORM "POLLWRNORM" - .tab POLLWRBAND "POLLWRBAND" - .tab POLLRDNORM "POLLRDNORM" - .tab POLLRDHUP "POLLRDHUP" - .tab POLLRDBAND "POLLRDBAND" - .tab POLLHUP "POLLHUP" - .tab POLLERR "POLLERR" - .tab POLLPRI "POLLPRI" - .tab POLLOUT "POLLOUT" - .tab POLLIN "POLLIN" - .endobj kPollNamesRo,globl,hidden - .previous - -// Mapping of poll() flags to their string names. -// @see RecreateFlags() - .initbss 301,_init_kPollNames -kPollNames: - .rept .Lrows - .quad 0 # unsigned long id - .quad 0 # const char *const name - .endr - .quad 0,0 # terminator row - .endobj kPollNames,globl - .previous - - .init.start 301,_init_kPollNames - pushpop .Lrows,%rcx # relocate RO→BSS b/c -fPIE crap -0: lodsl - .weak _base - mov _base(%rax),%rax # read what systemfive.S decoded - stosq - lodsl - add $_base,%rax - stosq - .loop 0b - add $16,%rdi - .init.end 301,_init_kPollNames diff --git a/tool/decode/lib/pollnames.h b/tool/decode/lib/pollnames.h deleted file mode 100644 index 3be600eaa..000000000 --- a/tool/decode/lib/pollnames.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_TOOL_DECODE_LIB_POLLNAMES_H_ -#define COSMOPOLITAN_TOOL_DECODE_LIB_POLLNAMES_H_ -#include "tool/decode/lib/idname.h" -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -extern struct IdName kPollNames[]; - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_TOOL_DECODE_LIB_POLLNAMES_H_ */ diff --git a/tool/hello/hello.c b/tool/hello/hello.c index 63821c7be..3ea5190e2 100644 --- a/tool/hello/hello.c +++ b/tool/hello/hello.c @@ -9,11 +9,6 @@ static ssize_t Write(int fd, const char *s) { } int main(int argc, char *argv[]) { - wchar_t B1[8]; - wchar_t B2[8]; - B1[1] = L'\0'; - B2[1] = L'\0'; Write(1, "hello world\n"); - kprintf("%x\n", wcscmp(memcpy(B1, "\xff\xff\xff\x7f", 4), - memcpy(B2, "\x00\x00\x00\x80", 4))); + kprintf("hello world\n"); } diff --git a/tool/net/lfuncs.c b/tool/net/lfuncs.c index b363217e9..91d406a7e 100644 --- a/tool/net/lfuncs.c +++ b/tool/net/lfuncs.c @@ -69,7 +69,11 @@ #include "third_party/zlib/zlib.h" static int Rdpid(void) { +#ifdef __x86_64__ return rdpid(); +#else + return -1; +#endif } int LuaHex(lua_State *L) { diff --git a/tool/net/net.mk b/tool/net/net.mk index 08099a769..3a2516db8 100644 --- a/tool/net/net.mk +++ b/tool/net/net.mk @@ -106,7 +106,7 @@ TOOL_NET_REDBEAN_STANDARD_ASSETS = \ TOOL_NET_REDBEAN_STANDARD_ASSETS_ZIP = \ $(COMPILE) -AZIP -T$@ \ - o/$(MODE)/third_party/zip/zip.com -b$(TMPDIR) -9qj $@ \ + $(VM) o/$(MODE)/third_party/zip/zip.com -b$(TMPDIR) -9qj $@ \ $(TOOL_NET_REDBEAN_STANDARD_ASSETS) o/$(MODE)/tool/net/redbean.com.dbg: \ @@ -122,7 +122,8 @@ o/$(MODE)/tool/net/redbean.com: \ o/$(MODE)/tool/net/redbean.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ o/$(MODE)/tool/build/symtab.com \ - $(TOOL_NET_REDBEAN_STANDARD_ASSETS) + $(TOOL_NET_REDBEAN_STANDARD_ASSETS) \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) @@ -234,7 +235,8 @@ o/$(MODE)/tool/net/redbean-demo.com.dbg: \ o/$(MODE)/tool/net/redbean-demo.com: \ o/$(MODE)/tool/net/redbean-demo.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ - o/$(MODE)/tool/build/symtab.com + o/$(MODE)/tool/build/symtab.com \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) @@ -256,7 +258,8 @@ o/$(MODE)/tool/net/redbean-static.com: \ o/$(MODE)/tool/net/redbean-static.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ o/$(MODE)/tool/build/symtab.com \ - $(TOOL_NET_REDBEAN_STANDARD_ASSETS) + $(TOOL_NET_REDBEAN_STANDARD_ASSETS) \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) @@ -281,7 +284,8 @@ o/$(MODE)/tool/net/redbean-unsecure.com: \ o/$(MODE)/tool/net/redbean-unsecure.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ o/$(MODE)/tool/build/symtab.com \ - $(TOOL_NET_REDBEAN_STANDARD_ASSETS) + $(TOOL_NET_REDBEAN_STANDARD_ASSETS) \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) @@ -305,7 +309,8 @@ o/$(MODE)/tool/net/redbean-original.com: \ o/$(MODE)/tool/net/redbean-original.com.dbg \ o/$(MODE)/third_party/zip/zip.com \ o/$(MODE)/tool/build/symtab.com \ - $(TOOL_NET_REDBEAN_STANDARD_ASSETS) + $(TOOL_NET_REDBEAN_STANDARD_ASSETS) \ + $(VM) @$(MAKE_OBJCOPY) @$(MAKE_SYMTAB_CREATE) @$(MAKE_SYMTAB_ZIP) diff --git a/tool/viz/lib/vizlib.mk b/tool/viz/lib/vizlib.mk index f89a12f5b..8edad336f 100644 --- a/tool/viz/lib/vizlib.mk +++ b/tool/viz/lib/vizlib.mk @@ -42,6 +42,7 @@ TOOL_VIZ_LIB_A_DIRECTDEPS = \ LIBC_TIME \ LIBC_TINYMATH \ LIBC_X \ + THIRD_PARTY_COMPILER_RT \ THIRD_PARTY_DLMALLOC \ THIRD_PARTY_GDTOA