Productionize new APE loader and more

The APE_NO_MODIFY_SELF loader payload has been moved out of the examples
folder and improved so that it works on BSD systems, and permits general
elf program headers. This brings its quality up enough that it should be
acceptable to use by default for many programs, e.g. Python, Lua, SQLite
and Python. It's the responsibility of the user to define an appropriate
TMPDIR if /tmp is considered an adversarial environment. Mac OS shall be
supported by APE_NO_MODIFY_SELF soon.

Fixes and improvements have been made to program_executable_name as it's
now the one true way to get the absolute path of the executing image.

This change fixes a memory leak in linenoise history loading, introduced
by performance optimizations in 51904e2687
This change fixes a longstanding regression with Mach system calls, that
23ae9dfceb back in February which impacted
our sched_yield() implementation, which is why no one noticed until now.

The Blinkenlights PC emulator has been improved. We now fix rendering on
XNU and BSD by not making the assumption that the kernel terminal driver
understands UTF8 since that seems to break its internal modeling of \r\n
which is now being addressed by using \e[𝑦H instead. The paneling is now
more compact in real mode so you won't need to make your font as tiny if
you're only emulating an 8086 program. The CLMUL ISA is now emulated too

This change also makes improvement to time. CLOCK_MONOTONIC now does the
right thing on Windows NT. The nanosecond time module functions added in
Python 3.7 have been backported.

This change doubles the performance of Argon2 password stretching simply
by not using its copy_block and xor_block helper functions, as they were
trivial to inline thus resulting in us needing to iterate over each 1024
byte block four fewer times.

This change makes code size improvements. _PyUnicode_ToNumeric() was 64k
in size and now it's 10k. The CJK codec lookup tables now use lazy delta
zigzag deflate (δzd) encoding which reduces their size from 600k to 200k
plus the code bloat caused by macro abuse in _decimal.c is now addressed
so our fully-loaded statically-linked hermetically-sealed Python virtual
interpreter container is now 9.4 megs in the default build mode and 5.5m
in MODE=tiny which leaves plenty of room for chibicc.

The pydoc web server now accommodates the use case of people who work by
SSH'ing into a different machine w/ python.com -m pydoc -p8080 -h0.0.0.0

Finally Python Capsulae delenda est and won't be supported in the future
This commit is contained in:
Justine Tunney 2021-10-02 08:17:04 -07:00
parent 9cb54218ab
commit 47a53e143b
270 changed files with 214544 additions and 23331 deletions

52
tool/build/lib/clmul.c Normal file
View file

@ -0,0 +1,52 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/x86feature.h"
#include "tool/build/lib/clmul.h"
#include "tool/build/lib/endian.h"
#include "tool/build/lib/modrm.h"
/**
* @fileoverview Carryless Multiplication ISA
*/
struct clmul {
uint64_t x, y;
};
static struct clmul clmul(uint64_t a, uint64_t b) {
uint64_t t, x = 0, y = 0;
if (a && b) {
if (bsrl(a) < bsrl(b)) t = a, a = b, b = t;
for (t = 0; b; a <<= 1, b >>= 1) {
if (b & 1) x ^= a, y ^= t;
t = t << 1 | a >> 63;
}
}
return (struct clmul){x, y};
}
void OpSsePclmulqdq(struct Machine *m, uint32_t rde) {
struct clmul res;
res = clmul(Read64(XmmRexrReg(m, rde) + ((m->xedd->op.uimm0 & 0x01) << 3)),
Read64(GetModrmRegisterXmmPointerRead16(m, rde) +
((m->xedd->op.uimm0 & 0x10) >> 1)));
Write64(XmmRexrReg(m, rde) + 0, res.x);
Write64(XmmRexrReg(m, rde) + 8, res.y);
}

11
tool/build/lib/clmul.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef COSMOPOLITAN_TOOL_BUILD_LIB_CLMUL_H_
#define COSMOPOLITAN_TOOL_BUILD_LIB_CLMUL_H_
#include "tool/build/lib/machine.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
void OpSsePclmulqdq(struct Machine *, uint32_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_TOOL_BUILD_LIB_CLMUL_H_ */

View file

@ -35,7 +35,7 @@ void OpCpuid(struct Machine *m, uint32_t rde) {
break;
case 1:
cx |= 1 << 0; // sse3
cx |= 0 << 1; // pclmulqdq
cx |= 1 << 1; // pclmulqdq
cx |= 1 << 9; // ssse3
cx |= 1 << 23; // popcnt
cx |= 1 << 30; // rdrnd

View file

@ -95,7 +95,9 @@ static char *DisError(struct Dis *d, char *p) {
static char *DisAddr(struct Dis *d, char *p) {
int64_t x = d->addr;
if (-2147483648 <= x && x <= 2147483647) {
if (0 <= x && x < 0x10fff0) {
return p + uint64toarray_fixed16(x, p, 24);
} else if (-2147483648 <= x && x <= 2147483647) {
return p + uint64toarray_fixed16(x, p, 32);
} else {
return p + uint64toarray_fixed16(x, p, 48);
@ -104,7 +106,13 @@ static char *DisAddr(struct Dis *d, char *p) {
static char *DisRaw(struct Dis *d, char *p) {
long i;
for (i = 0; i < PFIXLEN - MIN(PFIXLEN, d->xedd->op.PIVOTOP); ++i) {
int plen;
if (0 <= d->addr && d->addr < 0x10fff0) {
plen = 2;
} else {
plen = PFIXLEN;
}
for (i = 0; i < plen - MIN(plen, d->xedd->op.PIVOTOP); ++i) {
*p++ = ' ';
*p++ = ' ';
}
@ -127,8 +135,16 @@ static char *DisCode(struct Dis *d, char *p) {
}
static char *DisLineCode(struct Dis *d, char *p) {
int blen, plen;
if (0 <= d->addr && d->addr < 0x10fff0) {
plen = 2;
blen = 6;
} else {
blen = BYTELEN;
plen = PFIXLEN;
}
p = DisColumn(DisAddr(d, p), p, ADDRLEN);
p = DisColumn(DisRaw(d, p), p, PFIXLEN * 2 + 1 + BYTELEN * 2);
p = DisColumn(DisRaw(d, p), p, plen * 2 + 1 + blen * 2);
p = DisCode(d, p);
return p;
}

View file

@ -24,114 +24,214 @@
#include "tool/build/lib/modrm.h"
#include "tool/build/lib/throw.h"
void OpDivAlAhAxEbSigned(struct Machine *m, uint32_t rde) {
int8_t y, rem;
int16_t x, quo;
x = Read16(m->ax);
y = Read8(GetModrmRegisterBytePointerRead(m, rde));
if (!y || (x == INT16_MIN && y == -1)) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(INT8_MIN <= quo && quo <= INT8_MAX)) ThrowDivideError(m);
m->ax[0] = quo & 0xff;
m->ax[1] = rem & 0xff;
struct Dubble {
uint64_t lo;
uint64_t hi;
};
static inline struct Dubble DubbleNeg(struct Dubble x) {
struct Dubble d;
d.lo = -x.lo;
d.hi = ~(x.hi - (x.lo - 1 > x.lo));
return d;
}
void OpDivAlAhAxEbUnsigned(struct Machine *m, uint32_t rde) {
uint8_t y, rem;
uint16_t x, quo;
static inline struct Dubble DubbleShl(struct Dubble x) {
struct Dubble d;
d.lo = x.lo << 1;
d.hi = x.hi << 1 | x.lo >> 63;
return d;
}
static inline struct Dubble DubbleShr(struct Dubble x) {
struct Dubble d;
d.lo = x.lo >> 1 | x.hi << 63;
d.hi = x.hi >> 1;
return d;
}
static inline unsigned DubbleLte(struct Dubble a, struct Dubble b) {
return a.hi == b.hi ? a.lo <= b.lo : a.hi <= b.hi;
}
static struct Dubble DubbleMul(uint64_t a, uint64_t b) {
struct Dubble d;
uint64_t x, y, t;
x = (a & 0xffffffff) * (b & 0xffffffff);
t = x >> 32;
x &= 0xffffffff;
t += (a >> 32) * (b & 0xffffffff);
x += (t & 0xffffffff) << 32;
y = t >> 32;
t = x >> 32;
x &= 0xffffffff;
t += (b >> 32) * (a & 0xffffffff);
x += (t & 0xffffffff) << 32;
y += t >> 32;
y += (a >> 32) * (b >> 32);
d.lo = x;
d.hi = y;
return d;
}
static struct Dubble DubbleImul(uint64_t a, uint64_t b) {
unsigned s, t;
struct Dubble p;
if ((s = a >> 63)) a = -a;
if ((t = b >> 63)) b = -b;
p = DubbleMul(a, b);
return s ^ t ? DubbleNeg(p) : p;
}
static struct Dubble DubbleDiv(struct Dubble a, uint64_t b, uint64_t *r) {
int n, c;
uint64_t s;
struct Dubble d, q, t;
d.lo = b, d.hi = 0;
q.lo = 0, q.hi = 0;
for (n = 0; DubbleLte(d, a) && n < 128; ++n) {
d = DubbleShl(d);
}
for (; n > 0; --n) {
t = a;
d = DubbleShr(d);
q = DubbleShl(q);
s = a.lo, a.lo -= d.lo + 0, c = a.lo > s;
s = a.hi, a.hi -= d.hi + c, c = a.hi > s;
if (c) {
a = t;
} else {
q.lo++;
}
}
*r = a.lo;
return q;
}
static struct Dubble DubbleIdiv(struct Dubble a, uint64_t b, uint64_t *r) {
unsigned s, t;
struct Dubble q;
if ((s = a.hi >> 63)) a = DubbleNeg(a);
if ((t = b >> 63)) b = -b;
q = DubbleDiv(a, b, r);
if (s ^ t) q = DubbleNeg(q);
if (s) *r = -*r;
return q;
}
void OpDivAlAhAxEbSigned(struct Machine *m, uint32_t rde) {
int8_t y, r;
int16_t x, q;
x = Read16(m->ax);
y = Read8(GetModrmRegisterBytePointerRead(m, rde));
if (!y) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(UINT8_MIN <= quo && quo <= UINT8_MAX)) ThrowDivideError(m);
m->ax[0] = quo & 0xff;
m->ax[1] = rem & 0xff;
if (x == INT16_MIN) ThrowDivideError(m);
q = x / y;
r = x % y;
if (q != (int8_t)q) ThrowDivideError(m);
m->ax[0] = q & 0xff;
m->ax[1] = r & 0xff;
}
void OpDivAlAhAxEbUnsigned(struct Machine *m, uint32_t rde) {
uint8_t y, r;
uint16_t x, q;
x = Read16(m->ax);
y = Read8(GetModrmRegisterBytePointerRead(m, rde));
if (!y) ThrowDivideError(m);
q = x / y;
r = x % y;
if (q > 255) ThrowDivideError(m);
m->ax[0] = q & 0xff;
m->ax[1] = r & 0xff;
}
static void OpDivRdxRaxEvqpSigned64(struct Machine *m, uint32_t rde,
uint8_t *p) {
int64_t y, rem;
int128_t x, quo;
x = (uint128_t)Read64(m->dx) << 64 | Read64(m->ax);
y = Read64(p);
if (!y || (x == INT128_MIN && y == -1)) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(INT64_MIN <= quo && quo <= INT64_MAX)) ThrowDivideError(m);
Write64(m->ax, quo);
Write64(m->dx, rem);
uint64_t d, r;
struct Dubble q;
q.lo = Read64(m->ax);
q.hi = Read64(m->dx);
d = Read64(p);
if (!d) ThrowDivideError(m);
if (!q.lo && q.hi == 0x8000000000000000) ThrowDivideError(m);
q = DubbleIdiv(q, d, &r);
if ((int64_t)q.lo < 0 && (int64_t)q.hi != -1) ThrowDivideError(m);
if ((int64_t)q.lo >= 0 && q.hi) ThrowDivideError(m);
Write64(m->ax, q.lo);
Write64(m->dx, r);
}
static void OpDivRdxRaxEvqpSigned32(struct Machine *m, uint32_t rde,
uint8_t *p) {
int32_t y, rem;
int64_t x, quo;
int32_t y, r;
int64_t x, q;
x = (uint64_t)Read32(m->dx) << 32 | Read32(m->ax);
y = Read32(p);
if (!y || (x == INT64_MIN && y == -1)) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(INT32_MIN <= quo && quo <= INT32_MAX)) ThrowDivideError(m);
Write64(m->ax, quo & 0xffffffff);
Write64(m->dx, rem & 0xffffffff);
if (!y) ThrowDivideError(m);
if (x == INT64_MIN) ThrowDivideError(m);
q = x / y;
r = x % y;
if (q != (int32_t)q) ThrowDivideError(m);
Write64(m->ax, q & 0xffffffff);
Write64(m->dx, r & 0xffffffff);
}
static void OpDivRdxRaxEvqpSigned16(struct Machine *m, uint32_t rde,
uint8_t *p) {
int16_t y, rem;
int32_t x, quo;
int16_t y, r;
int32_t x, q;
x = (uint32_t)Read16(m->dx) << 16 | Read16(m->ax);
y = Read16(p);
if (!y || (x == INT32_MIN && y == -1)) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(INT16_MIN <= quo && quo <= INT16_MAX)) ThrowDivideError(m);
Write16(m->ax, quo);
Write16(m->dx, rem);
if (!y) ThrowDivideError(m);
if (x == INT32_MIN) ThrowDivideError(m);
q = x / y;
r = x % y;
if (q != (int16_t)q) ThrowDivideError(m);
Write16(m->ax, q);
Write16(m->dx, r);
}
static void OpDivRdxRaxEvqpUnsigned16(struct Machine *m, uint32_t rde,
uint8_t *p) {
uint16_t y, rem;
uint32_t x, quo;
uint16_t y, r;
uint32_t x, q;
x = (uint32_t)Read16(m->dx) << 16 | Read16(m->ax);
y = Read16(p);
if (!y) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(UINT16_MIN <= quo && quo <= UINT16_MAX)) ThrowDivideError(m);
Write16(m->ax, quo);
Write16(m->dx, rem);
q = x / y;
r = x % y;
if (q > 65535) ThrowDivideError(m);
Write16(m->ax, q);
Write16(m->dx, r);
}
static void OpDivRdxRaxEvqpUnsigned32(struct Machine *m, uint32_t rde,
uint8_t *p) {
uint32_t y, rem;
uint64_t x, quo;
uint32_t y, r;
uint64_t x, q;
x = (uint64_t)Read32(m->dx) << 32 | Read32(m->ax);
y = Read32(p);
if (!y) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(UINT32_MIN <= quo && quo <= UINT32_MAX)) ThrowDivideError(m);
Write64(m->ax, quo & 0xffffffff);
Write64(m->dx, rem & 0xffffffff);
q = x / y;
r = x % y;
if (q > 4294967295) ThrowDivideError(m);
Write64(m->ax, q & 0xffffffff);
Write64(m->dx, r & 0xffffffff);
}
static void OpDivRdxRaxEvqpUnsigned64(struct Machine *m, uint32_t rde,
uint8_t *p) {
uint64_t y, rem;
uint128_t x, quo;
x = (uint128_t)Read64(m->dx) << 64 | Read64(m->ax);
y = Read64(p);
if (!y) ThrowDivideError(m);
quo = x / y;
rem = x % y;
if (!(UINT64_MIN <= quo && quo <= UINT64_MAX)) ThrowDivideError(m);
Write64(m->ax, quo);
Write64(m->dx, rem);
uint64_t d, r;
struct Dubble q;
q.lo = Read64(m->ax);
q.hi = Read64(m->dx);
d = Read64(p);
if (!d) ThrowDivideError(m);
q = DubbleDiv(q, d, &r);
if (q.hi) ThrowDivideError(m);
Write64(m->ax, q.lo);
Write64(m->dx, r);
}
void OpDivRdxRaxEvqpSigned(struct Machine *m, uint32_t rde) {
@ -159,9 +259,9 @@ void OpDivRdxRaxEvqpUnsigned(struct Machine *m, uint32_t rde) {
}
void OpMulAxAlEbSigned(struct Machine *m, uint32_t rde) {
bool of;
int16_t ax;
uint8_t *p;
unsigned of;
p = GetModrmRegisterBytePointerRead(m, rde);
ax = (int8_t)Read8(m->ax) * (int8_t)Read8(p);
of = ax != (int8_t)ax;
@ -172,8 +272,8 @@ void OpMulAxAlEbSigned(struct Machine *m, uint32_t rde) {
void OpMulAxAlEbUnsigned(struct Machine *m, uint32_t rde) {
int ax;
bool of;
uint8_t *p;
unsigned of;
p = GetModrmRegisterBytePointerRead(m, rde);
ax = Read8(m->ax) * Read8(p);
of = ax != (uint8_t)ax;
@ -183,28 +283,25 @@ void OpMulAxAlEbUnsigned(struct Machine *m, uint32_t rde) {
}
void OpMulRdxRaxEvqpSigned(struct Machine *m, uint32_t rde) {
bool of;
uint8_t *p;
unsigned of;
int32_t dxax;
int64_t edxeax;
int128_t rdxrax;
struct Dubble rdxrax;
p = GetModrmRegisterWordPointerReadOszRexw(m, rde);
if (Rexw(rde)) {
__builtin_mul_overflow((int128_t)(int64_t)Read64(m->ax), (int64_t)Read64(p),
&rdxrax);
of = (int128_t)rdxrax != (int64_t)rdxrax;
Write64(m->ax, rdxrax);
Write64(m->dx, rdxrax >> 64);
rdxrax = DubbleImul(Read64(m->ax), Read64(p));
of = !!(rdxrax.hi + (rdxrax.lo >> 63));
Write64(m->ax, rdxrax.lo);
Write64(m->dx, rdxrax.hi);
} else if (!Osz(rde)) {
__builtin_mul_overflow((int64_t)(int32_t)Read32(m->ax), (int32_t)Read32(p),
&edxeax);
of = (int64_t)edxeax != (int32_t)edxeax;
edxeax = (int64_t)(int32_t)Read32(m->ax) * (int32_t)Read32(p);
of = edxeax != (int32_t)edxeax;
Write64(m->ax, edxeax);
Write64(m->dx, edxeax >> 32);
} else {
__builtin_mul_overflow((int32_t)(int16_t)Read16(m->ax), (int16_t)Read16(p),
&dxax);
of = (int32_t)dxax != (int16_t)dxax;
dxax = (int32_t)(int16_t)Read16(m->ax) * (int16_t)Read16(p);
of = dxax != (int16_t)dxax;
Write16(m->ax, dxax);
Write16(m->dx, dxax >> 16);
}
@ -213,25 +310,24 @@ void OpMulRdxRaxEvqpSigned(struct Machine *m, uint32_t rde) {
}
void OpMulRdxRaxEvqpUnsigned(struct Machine *m, uint32_t rde) {
bool of;
uint8_t *p;
unsigned of;
uint32_t dxax;
uint64_t edxeax;
uint128_t rdxrax;
struct Dubble rdxrax;
p = GetModrmRegisterWordPointerReadOszRexw(m, rde);
if (Rexw(rde)) {
__builtin_mul_overflow((uint128_t)Read64(m->ax), Read64(p), &rdxrax);
of = (uint64_t)rdxrax != rdxrax;
Write64(m->ax, rdxrax);
Write64(m->dx, rdxrax >> 64);
rdxrax = DubbleMul(Read64(m->ax), Read64(p));
of = !!rdxrax.hi;
Write64(m->ax, rdxrax.lo);
Write64(m->dx, rdxrax.hi);
} else if (!Osz(rde)) {
__builtin_mul_overflow((uint64_t)Read32(m->ax), Read32(p), &edxeax);
edxeax = (uint64_t)Read32(m->ax) * Read32(p);
of = (uint32_t)edxeax != edxeax;
Write64(m->ax, edxeax);
Write64(m->dx, edxeax >> 32);
} else {
__builtin_mul_overflow((uint32_t)(uint16_t)Read16(m->ax),
(uint16_t)Read16(p), &dxax);
dxax = (uint32_t)(uint16_t)Read16(m->ax) * (uint16_t)Read16(p);
of = (uint16_t)dxax != dxax;
Write16(m->ax, dxax);
Write16(m->dx, dxax >> 16);
@ -243,23 +339,18 @@ void OpMulRdxRaxEvqpUnsigned(struct Machine *m, uint32_t rde) {
static void AluImul(struct Machine *m, uint32_t rde, uint8_t *a, uint8_t *b) {
unsigned of;
if (Rexw(rde)) {
int64_t x, y, z;
x = Read64(a);
y = Read64(b);
of = __builtin_mul_overflow(x, y, &z);
Write64(RegRexrReg(m, rde), z);
struct Dubble p;
p = DubbleImul(Read64(a), Read64(b));
of = !!(p.hi + (p.lo >> 63));
Write64(RegRexrReg(m, rde), p.lo);
} else if (!Osz(rde)) {
int32_t x, y, z;
x = Read32(a);
y = Read32(b);
of = __builtin_mul_overflow(x, y, &z);
int64_t z;
z = (int64_t)(int32_t)Read32(a) * (int32_t)Read32(b);
of = z != (int32_t)z;
Write64(RegRexrReg(m, rde), z & 0xffffffff);
} else {
int z;
int16_t x, y;
x = Read16(a);
y = Read16(b);
z = x * y;
int32_t z;
z = (int32_t)(int16_t)Read16(a) * (int16_t)Read16(b);
of = z != (int16_t)z;
Write16(RegRexrReg(m, rde), z);
}

View file

@ -1,53 +1,82 @@
#ifndef COSMOPOLITAN_TOOL_BUILD_LIB_ENDIAN_H_
#define COSMOPOLITAN_TOOL_BUILD_LIB_ENDIAN_H_
#include "libc/bits/bits.h"
#include "libc/str/str.h"
#define Read8(P) (*(const uint8_t *)(P))
static inline uint8_t Read8(const uint8_t *p) {
return p[0];
}
#define Read16(P) \
({ \
const uint8_t *Ptr = (const uint8_t *)(P); \
READ16LE(P); \
})
static inline void Write8(uint8_t *p, uint8_t v) {
*p = v;
}
#define Read32(P) \
({ \
const uint8_t *Ptr = (const uint8_t *)(P); \
READ32LE(P); \
})
static inline uint16_t Read16(const uint8_t *p) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint16_t v;
memcpy(&v, p, sizeof(v));
return v;
#else
return p[1] << 8 | p[0];
#endif
}
#define Read64(P) \
({ \
const uint8_t *Ptr = (const uint8_t *)(P); \
READ64LE(P); \
})
static inline void Write16(uint8_t *p, uint16_t v) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(p, &v, sizeof(v));
#else
p[0] = (0x00FF & v) >> 000;
p[1] = (0xFF00 & v) >> 010;
#endif
}
#define Write8(P, V) \
do { \
uint8_t Val = (V); \
uint8_t *Ptr = (P); \
*Ptr = Val; \
} while (0)
static inline uint32_t Read32(const uint8_t *p) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint32_t v;
memcpy(&v, p, sizeof(v));
return v;
#else
return ((uint32_t)p[0] << 000 | (uint32_t)p[1] << 010 |
(uint32_t)p[2] << 020 | (uint32_t)p[3] << 030);
#endif
}
#define Write16(P, V) \
do { \
uint16_t Val = (V); \
uint8_t *Ptr = (P); \
WRITE16LE(Ptr, Val); \
} while (0)
static inline void Write32(uint8_t *p, uint32_t v) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(p, &v, sizeof(v));
#else
p[0] = (0x000000FF & v) >> 000;
p[1] = (0x0000FF00 & v) >> 010;
p[2] = (0x00FF0000 & v) >> 020;
p[3] = (0xFF000000 & v) >> 030;
#endif
}
#define Write32(P, V) \
do { \
uint32_t Val = (V); \
uint8_t *Ptr = (P); \
WRITE32LE(Ptr, Val); \
} while (0)
static inline uint64_t Read64(const uint8_t *p) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint64_t v;
memcpy(&v, p, sizeof(v));
return v;
#else
return ((uint64_t)p[0] << 000 | (uint64_t)p[1] << 010 |
(uint64_t)p[2] << 020 | (uint64_t)p[3] << 030 |
(uint64_t)p[4] << 040 | (uint64_t)p[5] << 050 |
(uint64_t)p[6] << 060 | (uint64_t)p[7] << 070);
#endif
}
#define Write64(P, V) \
do { \
uint64_t Val = (V); \
uint8_t *Ptr = (P); \
WRITE64LE(Ptr, Val); \
} while (0)
static inline void Write64(uint8_t *p, uint64_t v) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(p, &v, sizeof(v));
#else
p[0] = (0x00000000000000FF & v) >> 000;
p[1] = (0x000000000000FF00 & v) >> 010;
p[2] = (0x0000000000FF0000 & v) >> 020;
p[3] = (0x00000000FF000000 & v) >> 030;
p[4] = (0x000000FF00000000 & v) >> 040;
p[5] = (0x0000FF0000000000 & v) >> 050;
p[6] = (0x00FF000000000000 & v) >> 060;
p[7] = (0xFF00000000000000 & v) >> 070;
#endif
}
#endif /* COSMOPOLITAN_TOOL_BUILD_LIB_ENDIAN_H_ */

View file

@ -27,6 +27,7 @@
#include "tool/build/lib/bcd.h"
#include "tool/build/lib/bitscan.h"
#include "tool/build/lib/case.h"
#include "tool/build/lib/clmul.h"
#include "tool/build/lib/cpuid.h"
#include "tool/build/lib/cvt.h"
#include "tool/build/lib/divmul.h"
@ -2215,6 +2216,7 @@ void ExecuteSparseInstruction(struct Machine *m, uint32_t rde, uint32_t d) {
CASE(0x22a, OpMovntdqaVdqMdq(m, rde));
CASE(0x240, OpSsePmulld(m, rde));
CASE(0x30f, OpSsePalignr(m, rde));
CASE(0x344, OpSsePclmulqdq(m, rde));
default:
OpUd(m, rde);
}

View file

@ -48,7 +48,7 @@ ssize_t PrintPanels(int fd, long pn, struct Panel *p, long tyn, long txn) {
bzero(&b, sizeof(b));
AppendStr(&b, "\e[H");
for (y = 0; y < tyn; ++y) {
if (y) AppendStr(&b, "\r\n");
if (y) AppendFmt(&b, "\e[%dH", y + 1);
for (x = i = 0; i < pn; ++i) {
if (p[i].top <= y && y < p[i].bottom) {
j = state = 0;