From 59766efd3eb8f5ec21add2ced4fabcfcc66d5168 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 9 May 2023 23:35:10 -0700 Subject: [PATCH] Do some more aarch64 fixups --- dsp/tty/internal.h | 11 ++++--- dsp/tty/quant.h | 7 +++-- dsp/tty/rgb2ansi.c | 4 +-- dsp/tty/rgb2ttyf2i.c | 11 +++---- dsp/tty/rgb2ttyi2f.c | 2 +- dsp/tty/rgb2xterm24f.c | 29 +++++++++-------- dsp/tty/sendtitle.c | 10 ++++-- dsp/tty/tty2rgbf24.c | 4 +-- dsp/tty/ttyhisto.c | 4 ++- libc/nexgen32e/imapxlatab.S | 38 ----------------------- libc/nexgen32e/nexgen32e.h | 1 - libc/runtime/clone.c | 13 +++----- libc/sock/select-nt.c | 4 +++ libc/thread/spawn.c | 4 +-- test/libc/calls/sched_setscheduler_test.c | 3 ++ test/libc/intrin/lock_test.c | 2 +- tool/viz/lib/ycbcr2rgb3.c | 5 ++- 17 files changed, 63 insertions(+), 89 deletions(-) delete mode 100644 libc/nexgen32e/imapxlatab.S diff --git a/dsp/tty/internal.h b/dsp/tty/internal.h index a6dc83cfb..0be772622 100644 --- a/dsp/tty/internal.h +++ b/dsp/tty/internal.h @@ -1,21 +1,22 @@ #ifndef COSMOPOLITAN_DSP_TTY_INTERNAL_H_ #define COSMOPOLITAN_DSP_TTY_INTERNAL_H_ +#include "dsp/tty/quant.h" #include "dsp/tty/ttyrgb.h" #include "third_party/intel/xmmintrin.internal.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -struct TtyRgb rgb2tty24f_(__m128); -struct TtyRgb rgb2ttyf2i_(__m128); +struct TtyRgb rgb2tty24f_(ttyrgb_m128); +struct TtyRgb rgb2ttyf2i_(ttyrgb_m128); struct TtyRgb rgb2ttyi2f_(int, int, int); struct TtyRgb rgb2ansi_(int, int, int); struct TtyRgb rgb2ansihash_(int, int, int); struct TtyRgb rgb2xterm24_(int, int, int); -struct TtyRgb rgb2xterm256gray_(__m128); +struct TtyRgb rgb2xterm256gray_(ttyrgb_m128); struct TtyRgb tty2rgb_(struct TtyRgb); struct TtyRgb tty2rgb24_(struct TtyRgb); -__m128 tty2rgbf_(struct TtyRgb); -__m128 tty2rgbf24_(struct TtyRgb); +ttyrgb_m128 tty2rgbf_(struct TtyRgb); +ttyrgb_m128 tty2rgbf24_(struct TtyRgb); char *setbg16_(char *, struct TtyRgb); char *setfg16_(char *, struct TtyRgb); diff --git a/dsp/tty/quant.h b/dsp/tty/quant.h index 10bcbb95c..5cf1d084d 100644 --- a/dsp/tty/quant.h +++ b/dsp/tty/quant.h @@ -5,7 +5,6 @@ #include "libc/intrin/bits.h" #include "libc/limits.h" #include "libc/str/str.h" -#include "third_party/intel/xmmintrin.internal.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ @@ -14,12 +13,14 @@ COSMOPOLITAN_C_START_ #define BL 2 #define BR 3 -typedef __m128 (*tty2rgbf_f)(struct TtyRgb); +typedef float ttyrgb_m128 __attribute__((__vector_size__(16), __may_alias__)); + +typedef ttyrgb_m128 (*tty2rgbf_f)(struct TtyRgb); typedef char *(*setbg_f)(char *, struct TtyRgb); typedef char *(*setbgfg_f)(char *, struct TtyRgb, struct TtyRgb); typedef char *(*setfg_f)(char *, struct TtyRgb); typedef struct TtyRgb (*rgb2tty_f)(int, int, int); -typedef struct TtyRgb (*rgb2ttyf_f)(__m128); +typedef struct TtyRgb (*rgb2ttyf_f)(ttyrgb_m128); typedef struct TtyRgb (*tty2rgb_f)(struct TtyRgb); typedef struct TtyRgb ttypalette_t[2][8]; diff --git a/dsp/tty/rgb2ansi.c b/dsp/tty/rgb2ansi.c index ea9627766..51b321eab 100644 --- a/dsp/tty/rgb2ansi.c +++ b/dsp/tty/rgb2ansi.c @@ -39,9 +39,9 @@ struct TtyRgb tty2rgb_(struct TtyRgb rgbxt) { return g_ansi2rgb_[rgbxt.xt]; } -__m128 tty2rgbf_(struct TtyRgb rgbxt) { +ttyrgb_m128 tty2rgbf_(struct TtyRgb rgbxt) { rgbxt = g_ansi2rgb_[rgbxt.xt]; - return (__m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; + return (ttyrgb_m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; } static int rgb2xterm256_(int r, int g, int b) { diff --git a/dsp/tty/rgb2ttyf2i.c b/dsp/tty/rgb2ttyf2i.c index 2f5c70d2e..f14d12049 100644 --- a/dsp/tty/rgb2ttyf2i.c +++ b/dsp/tty/rgb2ttyf2i.c @@ -17,12 +17,11 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/tty/quant.h" -#include "third_party/intel/xmmintrin.internal.h" -struct TtyRgb rgb2ttyf2i_(__m128 rgb) { - __v4si i4; +typedef int ttyrgb_i4 __attribute__((__vector_size__(16))); + +struct TtyRgb rgb2ttyf2i_(ttyrgb_m128 rgb) { rgb *= 255; - /* i4 = __builtin_ia32_cvtps2dq(rgb); */ - asm("cvttps2dq\t%0,%1" : "+%x"(rgb), "=x"(i4)); - return rgb2tty(i4[0], i4[1], i4[2]); + ttyrgb_i4 rgbi = {rgb[0], rgb[1], rgb[2], rgb[3]}; + return rgb2tty(rgbi[0], rgbi[1], rgbi[2]); } diff --git a/dsp/tty/rgb2ttyi2f.c b/dsp/tty/rgb2ttyi2f.c index 8c4ef41e5..89bc1c39c 100644 --- a/dsp/tty/rgb2ttyi2f.c +++ b/dsp/tty/rgb2ttyi2f.c @@ -21,5 +21,5 @@ #include "libc/macros.internal.h" struct TtyRgb rgb2ttyi2f_(int r, int g, int b) { - return rgb2ttyf((__m128){r, g, b} / 255); + return rgb2ttyf((ttyrgb_m128){r, g, b} / 255); } diff --git a/dsp/tty/rgb2xterm24f.c b/dsp/tty/rgb2xterm24f.c index 05c12d24b..d78a027aa 100644 --- a/dsp/tty/rgb2xterm24f.c +++ b/dsp/tty/rgb2xterm24f.c @@ -17,25 +17,24 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/tty/quant.h" +#include "libc/macros.internal.h" #include "libc/math.h" +#include "third_party/intel/xmmintrin.internal.h" -/* -struct TtyRgb rgb2tty24f_(__m128 rgb) { - const __v4si kMax = {255, 255, 255, 255}; - const __v4si kMin = {0, 0, 0, 0}; - struct TtyRgb res; - __v4si rgb255; - rgb255 = _mm_min_ps(_mm_max_ps(_mm_cvtps_epi32(rgb * 255), kMin), kMax); - res = (struct TtyRgb){rgb255[0], rgb255[1], rgb255[2], rgb255[3]}; - return res; -} -*/ - -struct TtyRgb rgb2tty24f_(__m128 rgb) { - const __m128 kMax = {1, 1, 1, 1}; - const __m128 kMin = {0, 0, 0, 0}; +struct TtyRgb rgb2tty24f_(ttyrgb_m128 rgb) { +#ifdef __x86_64__ + const ttyrgb_m128 kMax = {1, 1, 1, 1}; + const ttyrgb_m128 kMin = {0, 0, 0, 0}; struct TtyRgb res; rgb = _mm_min_ps(_mm_max_ps(rgb, kMin), kMax) * 255; res = (struct TtyRgb){rgb[0], rgb[1], rgb[2], rgb[3]}; return res; +#else + return (struct TtyRgb){ + MAX(0, MIN(1, rgb[0])) * 255, + MAX(0, MIN(1, rgb[1])) * 255, + MAX(0, MIN(1, rgb[2])) * 255, + MAX(0, MIN(1, rgb[3])) * 255, + }; +#endif } diff --git a/dsp/tty/sendtitle.c b/dsp/tty/sendtitle.c index 7950aaae6..dd19db15c 100644 --- a/dsp/tty/sendtitle.c +++ b/dsp/tty/sendtitle.c @@ -30,13 +30,17 @@ * @param ti comes from ttyident() and null means no-op */ int ttysendtitle(int ttyfd, const char *title, const struct TtyIdent *ti) { + int res; if (ti) { + char *p; if (ti->id == kTtyIdScreen) { - return ttysend(ttyfd, gc(xstrcat("\eP\e]0;", title, "\a\e\\"))); + res = ttysend(ttyfd, (p = xstrcat("\eP\e]0;", title, "\a\e\\"))); } else { - return ttysend(ttyfd, gc(xstrcat("\e]0;", title, "\a"))); + res = ttysend(ttyfd, (p = xstrcat("\e]0;", title, "\a"))); } + free(p); } else { - return 0; + res = 0; } + return res; } diff --git a/dsp/tty/tty2rgbf24.c b/dsp/tty/tty2rgbf24.c index 8ae1fac13..c79ea44de 100644 --- a/dsp/tty/tty2rgbf24.c +++ b/dsp/tty/tty2rgbf24.c @@ -18,6 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/tty/quant.h" -__m128 tty2rgbf24_(struct TtyRgb rgbxt) { - return (__m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; +ttyrgb_m128 tty2rgbf24_(struct TtyRgb rgbxt) { + return (ttyrgb_m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; } diff --git a/dsp/tty/ttyhisto.c b/dsp/tty/ttyhisto.c index 81330028d..882a710c8 100644 --- a/dsp/tty/ttyhisto.c +++ b/dsp/tty/ttyhisto.c @@ -45,6 +45,8 @@ void ttyhisto(uint32_t histogram[hasatleast 256], histogram[xtcolors[i * 8]]++; } } - imapxlatab(dominant); + for (i = 0; i < 256; ++i) { + dominant[i] = i; + } qsort_r(dominant, 256, 1, (void *)histcmp, histogram); } diff --git a/libc/nexgen32e/imapxlatab.S b/libc/nexgen32e/imapxlatab.S deleted file mode 100644 index 986cf33c4..000000000 --- a/libc/nexgen32e/imapxlatab.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" -.text.startup - -// Identity maps 256-byte translation table. -// -// @param char (*rdi)[256] -// @speed 90mBps -// @mode long -imapxlatab: - .leafprologue - .profilable - pushpop 32,%rcx - mov $0x0706050403020100,%rax - mov $0x0808080808080808,%rdx - .balign 8 -1: stosq - add %rdx,%rax - .loop 1b - .leafepilogue - .endfn imapxlatab,globl,hidden diff --git a/libc/nexgen32e/nexgen32e.h b/libc/nexgen32e/nexgen32e.h index 829cbbedd..4a95f6002 100644 --- a/libc/nexgen32e/nexgen32e.h +++ b/libc/nexgen32e/nexgen32e.h @@ -9,7 +9,6 @@ extern const uint32_t kSha256[64]; extern const uint64_t kSha512[80]; extern const unsigned char kTensIndex[64]; -void imapxlatab(void *); void CheckStackIsAligned(void); COSMOPOLITAN_C_END_ diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 50b0ff70e..1faea3ace 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -481,8 +481,8 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, * char *stk = _mapstack(); * clone(worker, stk, GetStackSize() - 16, * CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | - * CLONE_SIGHAND | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | - * CLONE_CHILD_CLEARTID | CLONE_SETTLS, + * CLONE_SYSVSEM | CLONE_SIGHAND | CLONE_PARENT_SETTID | + * CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, * arg, &tid, &tib, &tib.tib_tid); * while (atomic_load(&tid) == 0) sched_yield(); * // thread is known @@ -538,6 +538,7 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, * - `CLONE_FS` * - `CLONE_FILES` * - `CLONE_SIGHAND` + * - `CLONE_SYSVSEM` * * This system call wrapper is intended for threads, and as such, we * won't polyfill Linux's ability to simulate unrelated calls (e.g. @@ -583,10 +584,6 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg, __enable_threads(); } - STRACE("clone(func=%t, stk=%p, stksz=%'zu, flags=%#x, arg=%p, ptid=%p, " - "tls=%p, ctid=%p)", - func, stk, stksz, flags, arg, ptid, tls, ctid); - if (!func) { rc = EINVAL; } else if (!IsTiny() && @@ -605,8 +602,8 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg, (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) != (CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND)) { - STRACE("clone flag unsupported on this platform"); + CLONE_SIGHAND | CLONE_SYSVSEM)) { + STRACE("cosmo clone() is picky about flags, see clone.c"); rc = EINVAL; #ifdef __x86_64__ } else if (IsXnu()) { diff --git a/libc/sock/select-nt.c b/libc/sock/select-nt.c index e0d91de3b..253235cf6 100644 --- a/libc/sock/select-nt.c +++ b/libc/sock/select-nt.c @@ -27,6 +27,8 @@ #include "libc/sysv/consts/poll.h" #include "libc/sysv/errfuns.h" +#ifdef __x86_64__ + int sys_select_nt(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout, const sigset_t *sigmask) { @@ -80,3 +82,5 @@ int sys_select_nt(int nfds, fd_set *readfds, fd_set *writefds, return fdcount; } + +#endif /* __x86_64__ */ diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c index e85b62565..d92ed76a3 100644 --- a/libc/thread/spawn.c +++ b/libc/thread/spawn.c @@ -127,8 +127,8 @@ int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) { spawner->arg = arg; rc = clone(Spawner, th->stk, GetStackSize() - 16 /* openbsd:stackbound */, CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID, + CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | + CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, spawner, &th->ptid, __adj_tls(th->tib), &th->tib->tib_tid); if (rc) { errno = rc; diff --git a/test/libc/calls/sched_setscheduler_test.c b/test/libc/calls/sched_setscheduler_test.c index bae41c467..fc6578668 100644 --- a/test/libc/calls/sched_setscheduler_test.c +++ b/test/libc/calls/sched_setscheduler_test.c @@ -29,6 +29,9 @@ #define DEFAULT_POLICY SCHED_OTHER void SetUp(void) { + if (IsFreebsd() && getuid() != 0) { + exit(0); + } if (IsXnu() || IsWindows() || IsOpenbsd() || IsWindows()) { exit(0); } diff --git a/test/libc/intrin/lock_test.c b/test/libc/intrin/lock_test.c index aa7e75ff1..3c2fad4e0 100644 --- a/test/libc/intrin/lock_test.c +++ b/test/libc/intrin/lock_test.c @@ -125,7 +125,7 @@ void TestContendedLock(const char *name, int kind) { stk = _mapstack(); rc = clone(Worker, stk, GetStackSize() - 16 /* openbsd:stackbound */, CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | + CLONE_SYSVSEM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, 0, &tid, &tib, &tib.tib_tid); if (rc) { diff --git a/tool/viz/lib/ycbcr2rgb3.c b/tool/viz/lib/ycbcr2rgb3.c index 6732fb6d7..ff1e557e3 100644 --- a/tool/viz/lib/ycbcr2rgb3.c +++ b/tool/viz/lib/ycbcr2rgb3.c @@ -151,13 +151,16 @@ void YCbCrComputeCoefficients(int swing, double gamma, void YCbCrInit(struct YCbCr **ycbcr, bool yonly, int swing, double gamma, const double gamut[3], const double illuminant[3]) { + int i; if (!*ycbcr) *ycbcr = xcalloc(1, sizeof(struct YCbCr)); (*ycbcr)->yonly = yonly; bzero((*ycbcr)->magnums, sizeof((*ycbcr)->magnums)); bzero((*ycbcr)->lighting, sizeof((*ycbcr)->lighting)); YCbCrComputeCoefficients(swing, gamma, gamut, illuminant, (*ycbcr)->magnums, (*ycbcr)->lighting, (*ycbcr)->transfer[0]); - imapxlatab((*ycbcr)->transfer[1]); + for (i = 0; i < 256; ++i) { + (*ycbcr)->transfer[1][i] = i; + } } void YCbCrFree(struct YCbCr **ycbcr) {