Add x86_64-linux-gnu emulator

I wanted a tiny scriptable meltdown proof way to run userspace programs
and visualize how program execution impacts memory. It helps to explain
how things like Actually Portable Executable works. It can show you how
the GCC generated code is going about manipulating matrices and more. I
didn't feel fully comfortable with Qemu and Bochs because I'm not smart
enough to understand them. I wanted something like gVisor but with much
stronger levels of assurances. I wanted a single binary that'll run, on
all major operating systems with an embedded GPL barrier ZIP filesystem
that is tiny enough to transpile to JavaScript and run in browsers too.

https://justine.storage.googleapis.com/emulator625.mp4
This commit is contained in:
Justine Tunney 2020-08-25 04:23:25 -07:00
parent 467504308a
commit f4f4caab0e
1052 changed files with 65667 additions and 7825 deletions

View file

@ -43,16 +43,13 @@ $(THIRD_PARTY_COMPILER_RT_A).pkg: \
$(foreach x,$(THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS),$($(x)_A).pkg)
$(THIRD_PARTY_COMPILER_RT_A_OBJS): \
DEFAULT_COPTS += \
-DCRT_HAS_128BIT
o/$(MODE)/third_party/compiler_rt/multc3.o \
o/$(MODE)/third_party/compiler_rt/divtc3.o: \
DEFAULT_COPTS += \
-w
DEFAULT_CFLAGS += \
$(OLD_CODE) \
-DCRT_HAS_128BIT
THIRD_PARTY_COMPILER_RT_LIBS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)))
THIRD_PARTY_COMPILER_RT_SRCS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_COMPILER_RT_HDRS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_HDRS))
THIRD_PARTY_COMPILER_RT_CHECKS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_CHECKS))
THIRD_PARTY_COMPILER_RT_OBJS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_OBJS))

64
third_party/compiler_rt/divmodti4.c vendored Normal file
View file

@ -0,0 +1,64 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/calls/calls.h"
#include "third_party/compiler_rt/int_lib.h"
/**
* Divides 128-bit signed integers w/ remainder.
*
* @param a is numerator
* @param b is denominator
* @param opt_out_rem receives euclidean division remainder if not null
* @return quotient or result of division
* @note rounds towards zero
*/
COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, tu_int *opt_out_rem) {
int k;
tu_int r;
ti_int sa, sb, sq, sr, x, y, q;
k = sizeof(ti_int) * CHAR_BIT - 1;
if (b < 0 && a == ((ti_int)1 << k)) {
volatile int x = 0;
x = 1 / x; // raise(SIGFPE)
}
sa = a >> k; // sa = a < 0 ? -1 : 0
sb = b >> k; // sb = b < 0 ? -1 : 0
x = (a ^ sa) - sa; // negate if sa == -1
y = (b ^ sb) - sb; // negate if sb == -1
sq = sa ^ sb; // sign of quotient
sr = sa; // sign of remainder
q = __udivmodti4(x, y, &r); // unsigned divide
q = (q ^ sq) - sq; // fix quotient sign
r = (r ^ sr) - sr; // fix remainder sign
if (opt_out_rem) *opt_out_rem = r;
return q;
}
/*
Intel Kabylake i9-9900 @ 3.10GHz Client Grade
idiv32 l: 27𝑐 9𝑛𝑠
idiv64 l: 27𝑐 9𝑛𝑠
divmodti4 small / small l: 42𝑐 14𝑛𝑠
divmodti4 small / large l: 14𝑐 5𝑛𝑠
divmodti4 large / small l: 92𝑐 30𝑛𝑠
divmodti4 large / large l: 209𝑐 68𝑛𝑠
Intel Kabylake i3-8100 @ 3.60GHz Client Grade
idiv32 l: 51𝑐 14𝑛𝑠
idiv64 l: 51𝑐 14𝑛𝑠
divmodti4 small / small l: 83𝑐 23𝑛𝑠
divmodti4 small / large l: 26𝑐 7𝑛𝑠
divmodti4 large / small l: 175𝑐 48𝑛𝑠
divmodti4 large / large l: 389𝑐 107𝑛𝑠
*/

View file

@ -1,36 +1,22 @@
/* clang-format off */
/* ===-- divti3.c - Implement __divti3 -------------------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __divti3 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
STATIC_YOINK("huge_compiler_rt_license");
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
#ifdef CRT_HAS_128BIT
/* Returns: a / b */
COMPILER_RT_ABI ti_int
__divti3(ti_int a, ti_int b)
{
const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */
ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /* sign of quotient */
return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */
/**
* Divides 128-bit signed integers.
*
* @param a is numerator
* @param b is denominator
* @return quotient or result of division
* @note rounds towards zero
*/
COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
return __divmodti4(a, b, NULL);
}
#endif /* CRT_HAS_128BIT */

View file

@ -16,6 +16,7 @@
#ifndef INT_LIB_H
#define INT_LIB_H
#define CRT_HAS_128BIT 1
/* Assumption: lool univac arithmetic */
/* Assumption: lool cray signed shift */
@ -74,6 +75,7 @@ COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
#ifdef CRT_HAS_128BIT
COMPILER_RT_ABI si_int __clzti2(ti_int a);
COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, tu_int *rem);
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
#endif

View file

@ -1,241 +1,137 @@
/* clang-format off */
/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __udivmodti4 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
STATIC_YOINK("huge_compiler_rt_license");
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
#ifdef CRT_HAS_128BIT
/* Effects: if rem != 0, *rem = a % b
* Returns: a / b
/**
* Returns 128 bit division result by 64 bit.
*
* Result must fit in 64 bits. Remainder is stored in r.
*
* @see libdivide libdivide_128_div_64_to_64() division fallback
* @see Knuth, Volume 2, section 4.3.1, Algorithm D for correctness proof
* @see https://danlark.org/2020/06/14/128-bit-division/
*/
/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
COMPILER_RT_ABI tu_int
__udivmodti4(tu_int a, tu_int b, tu_int* rem)
{
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
utwords n;
n.all = a;
utwords d;
d.all = b;
utwords q;
utwords r;
unsigned sr;
/* special cases, X is unknown, K != 0 */
if (n.s.high == 0)
{
if (d.s.high == 0)
{
/* 0 X
* ---
* 0 X
*/
if (rem)
*rem = n.s.low % d.s.low;
return n.s.low / d.s.low;
}
/* 0 X
* ---
* K X
*/
if (rem)
*rem = n.s.low;
return 0;
}
/* n.s.high != 0 */
if (d.s.low == 0)
{
if (d.s.high == 0)
{
/* K X
* ---
* 0 0
*/
if (rem)
*rem = n.s.high % d.s.low;
return n.s.high / d.s.low;
}
/* d.s.high != 0 */
if (n.s.low == 0)
{
/* K 0
* ---
* K 0
*/
if (rem)
{
r.s.high = n.s.high % d.s.high;
r.s.low = 0;
*rem = r.all;
}
return n.s.high / d.s.high;
}
/* K K
* ---
* K 0
*/
if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */
{
if (rem)
{
r.s.low = n.s.low;
r.s.high = n.s.high & (d.s.high - 1);
*rem = r.all;
}
return n.s.high >> __builtin_ctzll(d.s.high);
}
/* K K
* ---
* K 0
*/
sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
/* 0 <= sr <= n_udword_bits - 2 or sr large */
if (sr > n_udword_bits - 2)
{
if (rem)
*rem = n.all;
return 0;
}
++sr;
/* 1 <= sr <= n_udword_bits - 1 */
/* q.all = n.all << (n_utword_bits - sr); */
q.s.low = 0;
q.s.high = n.s.low << (n_udword_bits - sr);
/* r.all = n.all >> sr; */
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
}
else /* d.s.low != 0 */
{
if (d.s.high == 0)
{
/* K X
* ---
* 0 K
*/
if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */
{
if (rem)
*rem = n.s.low & (d.s.low - 1);
if (d.s.low == 1)
return n.all;
sr = __builtin_ctzll(d.s.low);
q.s.high = n.s.high >> sr;
q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
return q.all;
}
/* K X
* ---
* 0 K
*/
sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
- __builtin_clzll(n.s.high);
/* 2 <= sr <= n_utword_bits - 1
* q.all = n.all << (n_utword_bits - sr);
* r.all = n.all >> sr;
*/
if (sr == n_udword_bits)
{
q.s.low = 0;
q.s.high = n.s.low;
r.s.high = 0;
r.s.low = n.s.high;
}
else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1
{
q.s.low = 0;
q.s.high = n.s.low << (n_udword_bits - sr);
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
}
else // n_udword_bits + 1 <= sr <= n_utword_bits - 1
{
q.s.low = n.s.low << (n_utword_bits - sr);
q.s.high = (n.s.high << (n_utword_bits - sr)) |
(n.s.low >> (sr - n_udword_bits));
r.s.high = 0;
r.s.low = n.s.high >> (sr - n_udword_bits);
}
}
else
{
/* K X
* ---
* K K
*/
sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
/*0 <= sr <= n_udword_bits - 1 or sr large */
if (sr > n_udword_bits - 1)
{
if (rem)
*rem = n.all;
return 0;
}
++sr;
/* 1 <= sr <= n_udword_bits
* q.all = n.all << (n_utword_bits - sr);
* r.all = n.all >> sr;
*/
q.s.low = 0;
if (sr == n_udword_bits)
{
q.s.high = n.s.low;
r.s.high = 0;
r.s.low = n.s.high;
}
else
{
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
q.s.high = n.s.low << (n_udword_bits - sr);
}
}
}
/* Not a special case
* q and r are initialized with:
* q.all = n.all << (n_utword_bits - sr);
* r.all = n.all >> sr;
* 1 <= sr <= n_utword_bits - 1
*/
su_int carry = 0;
for (; sr > 0; --sr)
{
/* r:q = ((r:q) << 1) | carry */
r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
q.s.low = (q.s.low << 1) | carry;
/* carry = 0;
* if (r.all >= d.all)
* {
* r.all -= d.all;
* carry = 1;
* }
*/
const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
carry = s & 1;
r.all -= d.all & s;
}
q.all = (q.all << 1) | carry;
if (rem)
*rem = r.all;
return q.all;
forceinline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
du_int *r) {
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
const du_int b = 1ULL << (n_udword_bits / 2); // Number base (32 bits)
du_int un1, un0; // Norm. dividend LSD's
du_int vn1, vn0; // Norm. divisor digits
du_int q1, q0; // Quotient digits
du_int un64, un21, un10; // Dividend digit pairs
du_int rhat; // Remainder
si_int s; // Normalization shift
s = __builtin_clzll(v);
if (s > 0) {
// Normalize the divisor.
v = v << s;
un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
un10 = u0 << s; // Shift dividend left
} else {
// Avoid undefined behavior of (u0 >> 64).
un64 = u1;
un10 = u0;
}
// Break divisor up into two 32-bit digits.
vn1 = v >> (n_udword_bits / 2);
vn0 = v & 0xFFFFFFFF;
// Break right half of dividend into two digits.
un1 = un10 >> (n_udword_bits / 2);
un0 = un10 & 0xFFFFFFFF;
// Compute the first quotient digit, q1.
q1 = un64 / vn1;
rhat = un64 - q1 * vn1;
// q1 has at most error 2. No more than 2 iterations.
while (q1 >= b || q1 * vn0 > b * rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat >= b) break;
}
un21 = un64 * b + un1 - q1 * v;
// Compute the second quotient digit.
q0 = un21 / vn1;
rhat = un21 - q0 * vn1;
// q0 has at most error 2. No more than 2 iterations.
while (q0 >= b || q0 * vn0 > b * rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat >= b) break;
}
*r = (un21 * b + un0 - q0 * v) >> s;
return q1 * b + q0;
}
#endif /* CRT_HAS_128BIT */
forceinline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) {
#ifdef __x86_64__
du_int result;
asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "a"(u0), "d"(u1) : "cc");
return result;
#else
return udiv128by64to64default(u1, u0, v, r);
#endif
}
/**
* Performs 128-bit unsigned division and remainder.
*
* @param a is dividend
* @param b is divisor
* @param rem receives remainder if not NULL
*/
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
utwords dividend, divisor, quotient, remainder;
si_int shift;
dividend.all = a;
divisor.all = b;
if (divisor.all > dividend.all) {
if (rem) *rem = dividend.all;
return 0;
}
// When the divisor fits in 64 bits, we can use an optimized path.
if (divisor.s.high == 0) {
remainder.s.high = 0;
if (dividend.s.high < divisor.s.low) {
// The result fits in 64 bits.
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
quotient.s.high = 0;
} else {
// First, divide with the high part to get the remainder in
// dividend.s.high. After that dividend.s.high < divisor.s.low.
quotient.s.high = dividend.s.high / divisor.s.low;
dividend.s.high = dividend.s.high % divisor.s.low;
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
}
if (rem) *rem = remainder.all;
return quotient.all;
}
// 0 <= shift <= 63.
shift = __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
divisor.all <<= shift;
quotient.s.high = 0;
quotient.s.low = 0;
for (; shift >= 0; --shift) {
quotient.s.low <<= 1;
// Branch free version of.
// if (dividend.all >= divisor.all)
// {
// dividend.all -= divisor.all;
// carry = 1;
// }
ti_int s = (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
quotient.s.low |= s & 1;
dividend.all -= divisor.all & s;
divisor.all >>= 1;
}
if (rem) *rem = dividend.all;
return quotient.all;
}

View file

@ -1,30 +1,14 @@
/* clang-format off */
/* ===-- udivti3.c - Implement __udivti3 -----------------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __udivti3 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
STATIC_YOINK("huge_compiler_rt_license");
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
#ifdef CRT_HAS_128BIT
/* Returns: a / b */
COMPILER_RT_ABI tu_int
__udivti3(tu_int a, tu_int b)
{
return __udivmodti4(a, b, 0);
COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
return __udivmodti4(a, b, NULL);
}
#endif /* CRT_HAS_128BIT */