Do some more aarch64 fixups

This commit is contained in:
Justine Tunney 2023-05-09 23:35:10 -07:00
parent 86d9323a43
commit 59766efd3e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
17 changed files with 63 additions and 89 deletions

View file

@ -1,21 +1,22 @@
#ifndef COSMOPOLITAN_DSP_TTY_INTERNAL_H_ #ifndef COSMOPOLITAN_DSP_TTY_INTERNAL_H_
#define COSMOPOLITAN_DSP_TTY_INTERNAL_H_ #define COSMOPOLITAN_DSP_TTY_INTERNAL_H_
#include "dsp/tty/quant.h"
#include "dsp/tty/ttyrgb.h" #include "dsp/tty/ttyrgb.h"
#include "third_party/intel/xmmintrin.internal.h" #include "third_party/intel/xmmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0) #if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
struct TtyRgb rgb2tty24f_(__m128); struct TtyRgb rgb2tty24f_(ttyrgb_m128);
struct TtyRgb rgb2ttyf2i_(__m128); struct TtyRgb rgb2ttyf2i_(ttyrgb_m128);
struct TtyRgb rgb2ttyi2f_(int, int, int); struct TtyRgb rgb2ttyi2f_(int, int, int);
struct TtyRgb rgb2ansi_(int, int, int); struct TtyRgb rgb2ansi_(int, int, int);
struct TtyRgb rgb2ansihash_(int, int, int); struct TtyRgb rgb2ansihash_(int, int, int);
struct TtyRgb rgb2xterm24_(int, int, int); struct TtyRgb rgb2xterm24_(int, int, int);
struct TtyRgb rgb2xterm256gray_(__m128); struct TtyRgb rgb2xterm256gray_(ttyrgb_m128);
struct TtyRgb tty2rgb_(struct TtyRgb); struct TtyRgb tty2rgb_(struct TtyRgb);
struct TtyRgb tty2rgb24_(struct TtyRgb); struct TtyRgb tty2rgb24_(struct TtyRgb);
__m128 tty2rgbf_(struct TtyRgb); ttyrgb_m128 tty2rgbf_(struct TtyRgb);
__m128 tty2rgbf24_(struct TtyRgb); ttyrgb_m128 tty2rgbf24_(struct TtyRgb);
char *setbg16_(char *, struct TtyRgb); char *setbg16_(char *, struct TtyRgb);
char *setfg16_(char *, struct TtyRgb); char *setfg16_(char *, struct TtyRgb);

View file

@ -5,7 +5,6 @@
#include "libc/intrin/bits.h" #include "libc/intrin/bits.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "third_party/intel/xmmintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0) #if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
@ -14,12 +13,14 @@ COSMOPOLITAN_C_START_
#define BL 2 #define BL 2
#define BR 3 #define BR 3
typedef __m128 (*tty2rgbf_f)(struct TtyRgb); typedef float ttyrgb_m128 __attribute__((__vector_size__(16), __may_alias__));
typedef ttyrgb_m128 (*tty2rgbf_f)(struct TtyRgb);
typedef char *(*setbg_f)(char *, struct TtyRgb); typedef char *(*setbg_f)(char *, struct TtyRgb);
typedef char *(*setbgfg_f)(char *, struct TtyRgb, struct TtyRgb); typedef char *(*setbgfg_f)(char *, struct TtyRgb, struct TtyRgb);
typedef char *(*setfg_f)(char *, struct TtyRgb); typedef char *(*setfg_f)(char *, struct TtyRgb);
typedef struct TtyRgb (*rgb2tty_f)(int, int, int); typedef struct TtyRgb (*rgb2tty_f)(int, int, int);
typedef struct TtyRgb (*rgb2ttyf_f)(__m128); typedef struct TtyRgb (*rgb2ttyf_f)(ttyrgb_m128);
typedef struct TtyRgb (*tty2rgb_f)(struct TtyRgb); typedef struct TtyRgb (*tty2rgb_f)(struct TtyRgb);
typedef struct TtyRgb ttypalette_t[2][8]; typedef struct TtyRgb ttypalette_t[2][8];

View file

@ -39,9 +39,9 @@ struct TtyRgb tty2rgb_(struct TtyRgb rgbxt) {
return g_ansi2rgb_[rgbxt.xt]; return g_ansi2rgb_[rgbxt.xt];
} }
__m128 tty2rgbf_(struct TtyRgb rgbxt) { ttyrgb_m128 tty2rgbf_(struct TtyRgb rgbxt) {
rgbxt = g_ansi2rgb_[rgbxt.xt]; rgbxt = g_ansi2rgb_[rgbxt.xt];
return (__m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; return (ttyrgb_m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255;
} }
static int rgb2xterm256_(int r, int g, int b) { static int rgb2xterm256_(int r, int g, int b) {

View file

@ -17,12 +17,11 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "dsp/tty/quant.h" #include "dsp/tty/quant.h"
#include "third_party/intel/xmmintrin.internal.h"
struct TtyRgb rgb2ttyf2i_(__m128 rgb) { typedef int ttyrgb_i4 __attribute__((__vector_size__(16)));
__v4si i4;
struct TtyRgb rgb2ttyf2i_(ttyrgb_m128 rgb) {
rgb *= 255; rgb *= 255;
/* i4 = __builtin_ia32_cvtps2dq(rgb); */ ttyrgb_i4 rgbi = {rgb[0], rgb[1], rgb[2], rgb[3]};
asm("cvttps2dq\t%0,%1" : "+%x"(rgb), "=x"(i4)); return rgb2tty(rgbi[0], rgbi[1], rgbi[2]);
return rgb2tty(i4[0], i4[1], i4[2]);
} }

View file

@ -21,5 +21,5 @@
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
struct TtyRgb rgb2ttyi2f_(int r, int g, int b) { struct TtyRgb rgb2ttyi2f_(int r, int g, int b) {
return rgb2ttyf((__m128){r, g, b} / 255); return rgb2ttyf((ttyrgb_m128){r, g, b} / 255);
} }

View file

@ -17,25 +17,24 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "dsp/tty/quant.h" #include "dsp/tty/quant.h"
#include "libc/macros.internal.h"
#include "libc/math.h" #include "libc/math.h"
#include "third_party/intel/xmmintrin.internal.h"
/* struct TtyRgb rgb2tty24f_(ttyrgb_m128 rgb) {
struct TtyRgb rgb2tty24f_(__m128 rgb) { #ifdef __x86_64__
const __v4si kMax = {255, 255, 255, 255}; const ttyrgb_m128 kMax = {1, 1, 1, 1};
const __v4si kMin = {0, 0, 0, 0}; const ttyrgb_m128 kMin = {0, 0, 0, 0};
struct TtyRgb res;
__v4si rgb255;
rgb255 = _mm_min_ps(_mm_max_ps(_mm_cvtps_epi32(rgb * 255), kMin), kMax);
res = (struct TtyRgb){rgb255[0], rgb255[1], rgb255[2], rgb255[3]};
return res;
}
*/
struct TtyRgb rgb2tty24f_(__m128 rgb) {
const __m128 kMax = {1, 1, 1, 1};
const __m128 kMin = {0, 0, 0, 0};
struct TtyRgb res; struct TtyRgb res;
rgb = _mm_min_ps(_mm_max_ps(rgb, kMin), kMax) * 255; rgb = _mm_min_ps(_mm_max_ps(rgb, kMin), kMax) * 255;
res = (struct TtyRgb){rgb[0], rgb[1], rgb[2], rgb[3]}; res = (struct TtyRgb){rgb[0], rgb[1], rgb[2], rgb[3]};
return res; return res;
#else
return (struct TtyRgb){
MAX(0, MIN(1, rgb[0])) * 255,
MAX(0, MIN(1, rgb[1])) * 255,
MAX(0, MIN(1, rgb[2])) * 255,
MAX(0, MIN(1, rgb[3])) * 255,
};
#endif
} }

View file

@ -30,13 +30,17 @@
* @param ti comes from ttyident() and null means no-op * @param ti comes from ttyident() and null means no-op
*/ */
int ttysendtitle(int ttyfd, const char *title, const struct TtyIdent *ti) { int ttysendtitle(int ttyfd, const char *title, const struct TtyIdent *ti) {
int res;
if (ti) { if (ti) {
char *p;
if (ti->id == kTtyIdScreen) { if (ti->id == kTtyIdScreen) {
return ttysend(ttyfd, gc(xstrcat("\eP\e]0;", title, "\a\e\\"))); res = ttysend(ttyfd, (p = xstrcat("\eP\e]0;", title, "\a\e\\")));
} else { } else {
return ttysend(ttyfd, gc(xstrcat("\e]0;", title, "\a"))); res = ttysend(ttyfd, (p = xstrcat("\e]0;", title, "\a")));
} }
free(p);
} else { } else {
return 0; res = 0;
} }
return res;
} }

View file

@ -18,6 +18,6 @@
*/ */
#include "dsp/tty/quant.h" #include "dsp/tty/quant.h"
__m128 tty2rgbf24_(struct TtyRgb rgbxt) { ttyrgb_m128 tty2rgbf24_(struct TtyRgb rgbxt) {
return (__m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255; return (ttyrgb_m128){(int)rgbxt.r, (int)rgbxt.g, (int)rgbxt.b} / 255;
} }

View file

@ -45,6 +45,8 @@ void ttyhisto(uint32_t histogram[hasatleast 256],
histogram[xtcolors[i * 8]]++; histogram[xtcolors[i * 8]]++;
} }
} }
imapxlatab(dominant); for (i = 0; i < 256; ++i) {
dominant[i] = i;
}
qsort_r(dominant, 256, 1, (void *)histcmp, histogram); qsort_r(dominant, 256, 1, (void *)histcmp, histogram);
} }

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.text.startup
// Identity maps 256-byte translation table.
//
// @param char (*rdi)[256]
// @speed 90mBps
// @mode long
imapxlatab:
.leafprologue
.profilable
pushpop 32,%rcx
mov $0x0706050403020100,%rax
mov $0x0808080808080808,%rdx
.balign 8
1: stosq
add %rdx,%rax
.loop 1b
.leafepilogue
.endfn imapxlatab,globl,hidden

View file

@ -9,7 +9,6 @@ extern const uint32_t kSha256[64];
extern const uint64_t kSha512[80]; extern const uint64_t kSha512[80];
extern const unsigned char kTensIndex[64]; extern const unsigned char kTensIndex[64];
void imapxlatab(void *);
void CheckStackIsAligned(void); void CheckStackIsAligned(void);
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_

View file

@ -481,8 +481,8 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
* char *stk = _mapstack(); * char *stk = _mapstack();
* clone(worker, stk, GetStackSize() - 16, * clone(worker, stk, GetStackSize() - 16,
* CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | * CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
* CLONE_SIGHAND | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | * CLONE_SYSVSEM | CLONE_SIGHAND | CLONE_PARENT_SETTID |
* CLONE_CHILD_CLEARTID | CLONE_SETTLS, * CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
* arg, &tid, &tib, &tib.tib_tid); * arg, &tid, &tib, &tib.tib_tid);
* while (atomic_load(&tid) == 0) sched_yield(); * while (atomic_load(&tid) == 0) sched_yield();
* // thread is known * // thread is known
@ -538,6 +538,7 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
* - `CLONE_FS` * - `CLONE_FS`
* - `CLONE_FILES` * - `CLONE_FILES`
* - `CLONE_SIGHAND` * - `CLONE_SIGHAND`
* - `CLONE_SYSVSEM`
* *
* This system call wrapper is intended for threads, and as such, we * This system call wrapper is intended for threads, and as such, we
* won't polyfill Linux's ability to simulate unrelated calls (e.g. * won't polyfill Linux's ability to simulate unrelated calls (e.g.
@ -583,10 +584,6 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
__enable_threads(); __enable_threads();
} }
STRACE("clone(func=%t, stk=%p, stksz=%'zu, flags=%#x, arg=%p, ptid=%p, "
"tls=%p, ctid=%p)",
func, stk, stksz, flags, arg, ptid, tls, ctid);
if (!func) { if (!func) {
rc = EINVAL; rc = EINVAL;
} else if (!IsTiny() && } else if (!IsTiny() &&
@ -605,8 +602,8 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
(flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) != CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) !=
(CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | (CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND)) { CLONE_SIGHAND | CLONE_SYSVSEM)) {
STRACE("clone flag unsupported on this platform"); STRACE("cosmo clone() is picky about flags, see clone.c");
rc = EINVAL; rc = EINVAL;
#ifdef __x86_64__ #ifdef __x86_64__
} else if (IsXnu()) { } else if (IsXnu()) {

View file

@ -27,6 +27,8 @@
#include "libc/sysv/consts/poll.h" #include "libc/sysv/consts/poll.h"
#include "libc/sysv/errfuns.h" #include "libc/sysv/errfuns.h"
#ifdef __x86_64__
int sys_select_nt(int nfds, fd_set *readfds, fd_set *writefds, int sys_select_nt(int nfds, fd_set *readfds, fd_set *writefds,
fd_set *exceptfds, struct timeval *timeout, fd_set *exceptfds, struct timeval *timeout,
const sigset_t *sigmask) { const sigset_t *sigmask) {
@ -80,3 +82,5 @@ int sys_select_nt(int nfds, fd_set *readfds, fd_set *writefds,
return fdcount; return fdcount;
} }
#endif /* __x86_64__ */

View file

@ -127,8 +127,8 @@ int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) {
spawner->arg = arg; spawner->arg = arg;
rc = clone(Spawner, th->stk, GetStackSize() - 16 /* openbsd:stackbound */, rc = clone(Spawner, th->stk, GetStackSize() - 16 /* openbsd:stackbound */,
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_CLEARTID, CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
spawner, &th->ptid, __adj_tls(th->tib), &th->tib->tib_tid); spawner, &th->ptid, __adj_tls(th->tib), &th->tib->tib_tid);
if (rc) { if (rc) {
errno = rc; errno = rc;

View file

@ -29,6 +29,9 @@
#define DEFAULT_POLICY SCHED_OTHER #define DEFAULT_POLICY SCHED_OTHER
void SetUp(void) { void SetUp(void) {
if (IsFreebsd() && getuid() != 0) {
exit(0);
}
if (IsXnu() || IsWindows() || IsOpenbsd() || IsWindows()) { if (IsXnu() || IsWindows() || IsOpenbsd() || IsWindows()) {
exit(0); exit(0);
} }

View file

@ -125,7 +125,7 @@ void TestContendedLock(const char *name, int kind) {
stk = _mapstack(); stk = _mapstack();
rc = clone(Worker, stk, GetStackSize() - 16 /* openbsd:stackbound */, rc = clone(Worker, stk, GetStackSize() - 16 /* openbsd:stackbound */,
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_SYSVSEM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS, CLONE_CHILD_CLEARTID | CLONE_SETTLS,
0, &tid, &tib, &tib.tib_tid); 0, &tid, &tib, &tib.tib_tid);
if (rc) { if (rc) {

View file

@ -151,13 +151,16 @@ void YCbCrComputeCoefficients(int swing, double gamma,
void YCbCrInit(struct YCbCr **ycbcr, bool yonly, int swing, double gamma, void YCbCrInit(struct YCbCr **ycbcr, bool yonly, int swing, double gamma,
const double gamut[3], const double illuminant[3]) { const double gamut[3], const double illuminant[3]) {
int i;
if (!*ycbcr) *ycbcr = xcalloc(1, sizeof(struct YCbCr)); if (!*ycbcr) *ycbcr = xcalloc(1, sizeof(struct YCbCr));
(*ycbcr)->yonly = yonly; (*ycbcr)->yonly = yonly;
bzero((*ycbcr)->magnums, sizeof((*ycbcr)->magnums)); bzero((*ycbcr)->magnums, sizeof((*ycbcr)->magnums));
bzero((*ycbcr)->lighting, sizeof((*ycbcr)->lighting)); bzero((*ycbcr)->lighting, sizeof((*ycbcr)->lighting));
YCbCrComputeCoefficients(swing, gamma, gamut, illuminant, (*ycbcr)->magnums, YCbCrComputeCoefficients(swing, gamma, gamut, illuminant, (*ycbcr)->magnums,
(*ycbcr)->lighting, (*ycbcr)->transfer[0]); (*ycbcr)->lighting, (*ycbcr)->transfer[0]);
imapxlatab((*ycbcr)->transfer[1]); for (i = 0; i < 256; ++i) {
(*ycbcr)->transfer[1][i] = i;
}
} }
void YCbCrFree(struct YCbCr **ycbcr) { void YCbCrFree(struct YCbCr **ycbcr) {