Add x86_64-linux-gnu emulator

I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
2025-10-26 03:00:57 +00:00 · 2020-08-25 04:23:25 -07:00 · 2020-08-25 04:23:25 -07:00 · f4f4caab0e
commit f4f4caab0e
parent 467504308a
1052 changed files with 65667 additions and 7825 deletions
--- a/third_party/compiler_rt/compiler_rt.mk
+++ b/third_party/compiler_rt/compiler_rt.mk
@ -43,16 +43,13 @@ $(THIRD_PARTY_COMPILER_RT_A).pkg:				\
 		$(foreach x,$(THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS),$($(x)_A).pkg)

 $(THIRD_PARTY_COMPILER_RT_A_OBJS):				\
-	DEFAULT_COPTS +=					\
-		-DCRT_HAS_128BIT
-
-o/$(MODE)/third_party/compiler_rt/multc3.o			\
-o/$(MODE)/third_party/compiler_rt/divtc3.o:			\
-	DEFAULT_COPTS +=					\
-		-w
+		DEFAULT_CFLAGS +=					\
+			$(OLD_CODE)					\
+			-DCRT_HAS_128BIT

 THIRD_PARTY_COMPILER_RT_LIBS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)))
 THIRD_PARTY_COMPILER_RT_SRCS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_SRCS))
+THIRD_PARTY_COMPILER_RT_HDRS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_HDRS))
 THIRD_PARTY_COMPILER_RT_CHECKS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_CHECKS))
 THIRD_PARTY_COMPILER_RT_OBJS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_OBJS))

--- a/third_party/compiler_rt/divmodti4.c
+++ b/third_party/compiler_rt/divmodti4.c
@ -0,0 +1,64 @@
+#if 0
+/*─────────────────────────────────────────────────────────────────╗
+│ To the extent possible under law, Justine Tunney has waived      │
+│ all copyright and related or neighboring rights to division,     │
+│ as it is written in the following disclaimers:                   │
+│   • http://unlicense.org/                                        │
+│   • http://creativecommons.org/publicdomain/zero/1.0/            │
+╚─────────────────────────────────────────────────────────────────*/
+#endif
+#include "libc/calls/calls.h"
+#include "third_party/compiler_rt/int_lib.h"
+
+/**
+ * Divides 128-bit signed integers w/ remainder.
+ *
+ * @param a is numerator
+ * @param b is denominator
+ * @param opt_out_rem receives euclidean division remainder if not null
+ * @return quotient or result of division
+ * @note rounds towards zero
+ */
+COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, tu_int *opt_out_rem) {
+  int k;
+  tu_int r;
+  ti_int sa, sb, sq, sr, x, y, q;
+  k = sizeof(ti_int) * CHAR_BIT - 1;
+  if (b < 0 && a == ((ti_int)1 << k)) {
+    volatile int x = 0;
+    x = 1 / x;  // raise(SIGFPE)
+  }
+  sa = a >> k;                 // sa = a < 0 ? -1 : 0
+  sb = b >> k;                 // sb = b < 0 ? -1 : 0
+  x = (a ^ sa) - sa;           // negate if sa == -1
+  y = (b ^ sb) - sb;           // negate if sb == -1
+  sq = sa ^ sb;                // sign of quotient
+  sr = sa;                     // sign of remainder
+  q = __udivmodti4(x, y, &r);  // unsigned divide
+  q = (q ^ sq) - sq;           // fix quotient sign
+  r = (r ^ sr) - sr;           // fix remainder sign
+  if (opt_out_rem) *opt_out_rem = r;
+  return q;
+}
+
+/*
+
+  Intel Kabylake i9-9900 @ 3.10GHz Client Grade
+
+    idiv32                         l:         27𝑐          9𝑛𝑠
+    idiv64                         l:         27𝑐          9𝑛𝑠
+    divmodti4 small / small        l:         42𝑐         14𝑛𝑠
+    divmodti4 small / large        l:         14𝑐          5𝑛𝑠
+    divmodti4 large / small        l:         92𝑐         30𝑛𝑠
+    divmodti4 large / large        l:        209𝑐         68𝑛𝑠
+
+  Intel Kabylake i3-8100 @ 3.60GHz Client Grade
+
+    idiv32                         l:         51𝑐         14𝑛𝑠
+    idiv64                         l:         51𝑐         14𝑛𝑠
+    divmodti4 small / small        l:         83𝑐         23𝑛𝑠
+    divmodti4 small / large        l:         26𝑐          7𝑛𝑠
+    divmodti4 large / small        l:        175𝑐         48𝑛𝑠
+    divmodti4 large / large        l:        389𝑐        107𝑛𝑠
+
+*/
--- a/third_party/compiler_rt/divti3.c
+++ b/third_party/compiler_rt/divti3.c
@ -1,36 +1,22 @@
-/* clang-format off */
-/* ===-- divti3.c - Implement __divti3 -------------------------------------===
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- *
- * This file implements __divti3 for the compiler_rt library.
- *
- * ===----------------------------------------------------------------------===
- */
-
-STATIC_YOINK("huge_compiler_rt_license");
-
+#if 0
+/*─────────────────────────────────────────────────────────────────╗
+│ To the extent possible under law, Justine Tunney has waived      │
+│ all copyright and related or neighboring rights to division,     │
+│ as it is written in the following disclaimers:                   │
+│   • http://unlicense.org/                                        │
+│   • http://creativecommons.org/publicdomain/zero/1.0/            │
+╚─────────────────────────────────────────────────────────────────*/
+#endif
 #include "third_party/compiler_rt/int_lib.h"

-#ifdef CRT_HAS_128BIT
-
-/* Returns: a / b */
-
-COMPILER_RT_ABI ti_int
-__divti3(ti_int a, ti_int b)
-{
-    const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
-    ti_int s_a = a >> bits_in_tword_m1;           /* s_a = a < 0 ? -1 : 0 */
-    ti_int s_b = b >> bits_in_tword_m1;           /* s_b = b < 0 ? -1 : 0 */
-    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
-    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
-    s_a ^= s_b;                                  /* sign of quotient */
-    return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */
+/**
+ * Divides 128-bit signed integers.
+ *
+ * @param a is numerator
+ * @param b is denominator
+ * @return quotient or result of division
+ * @note rounds towards zero
+ */
+COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
+  return __divmodti4(a, b, NULL);
 }
-
-#endif /* CRT_HAS_128BIT */
--- a/third_party/compiler_rt/int_lib.h
+++ b/third_party/compiler_rt/int_lib.h
@ -16,6 +16,7 @@

 #ifndef INT_LIB_H
 #define INT_LIB_H
+#define CRT_HAS_128BIT 1

 /* Assumption: lool univac arithmetic */
 /* Assumption: lool cray signed shift */
@ -74,6 +75,7 @@ COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
 COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
 #ifdef CRT_HAS_128BIT
 COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, tu_int *rem);
 COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
 #endif

--- a/third_party/compiler_rt/udivmodti4.c
+++ b/third_party/compiler_rt/udivmodti4.c
@ -1,241 +1,137 @@
-/* clang-format off */
-/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
- *
- *                    The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- *
- * This file implements __udivmodti4 for the compiler_rt library.
- *
- * ===----------------------------------------------------------------------===
- */ 
-
-STATIC_YOINK("huge_compiler_rt_license");
-
+#if 0
+/*─────────────────────────────────────────────────────────────────╗
+│ To the extent possible under law, Justine Tunney has waived      │
+│ all copyright and related or neighboring rights to division,     │
+│ as it is written in the following disclaimers:                   │
+│   • http://unlicense.org/                                        │
+│   • http://creativecommons.org/publicdomain/zero/1.0/            │
+╚─────────────────────────────────────────────────────────────────*/
+#endif
 #include "third_party/compiler_rt/int_lib.h"

-#ifdef CRT_HAS_128BIT
-
-/* Effects: if rem != 0, *rem = a % b 
- * Returns: a / b 
+/**
+ * Returns 128 bit division result by 64 bit.
+ *
+ * Result must fit in 64 bits. Remainder is stored in r.
+ *
+ * @see libdivide libdivide_128_div_64_to_64() division fallback
+ * @see Knuth, Volume 2, section 4.3.1, Algorithm D for correctness proof
+ * @see https://danlark.org/2020/06/14/128-bit-division/
 */
-
-/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
-
-COMPILER_RT_ABI tu_int
-__udivmodti4(tu_int a, tu_int b, tu_int* rem)
-{
-    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
-    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
-    utwords n;
-    n.all = a;
-    utwords d;
-    d.all = b;
-    utwords q;
-    utwords r;
-    unsigned sr;
-    /* special cases, X is unknown, K != 0 */
-    if (n.s.high == 0)
-    {
-        if (d.s.high == 0)
-        {
-            /* 0 X
-             * ---
-             * 0 X
-             */
-            if (rem)
-                *rem = n.s.low % d.s.low;
-            return n.s.low / d.s.low;
-        }
-        /* 0 X
-         * ---
-         * K X
-         */
-        if (rem)
-            *rem = n.s.low;
-        return 0;
-    }
-    /* n.s.high != 0 */
-    if (d.s.low == 0)
-    {
-        if (d.s.high == 0)
-        {
-            /* K X
-             * ---
-             * 0 0
-             */
-            if (rem)
-                *rem = n.s.high % d.s.low;
-            return n.s.high / d.s.low;
-        }
-        /* d.s.high != 0 */
-        if (n.s.low == 0)
-        {
-            /* K 0
-             * ---
-             * K 0
-             */
-            if (rem)
-            {
-                r.s.high = n.s.high % d.s.high;
-                r.s.low = 0;
-                *rem = r.all;
-            }
-            return n.s.high / d.s.high;
-        }
-        /* K K
-         * ---
-         * K 0
-         */
-        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
-        {
-            if (rem)
-            {
-                r.s.low = n.s.low;
-                r.s.high = n.s.high & (d.s.high - 1);
-                *rem = r.all;
-            }
-            return n.s.high >> __builtin_ctzll(d.s.high);
-        }
-        /* K K
-         * ---
-         * K 0
-         */
-        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
-        /* 0 <= sr <= n_udword_bits - 2 or sr large */
-        if (sr > n_udword_bits - 2)
-        {
-           if (rem)
-                *rem = n.all;
-            return 0;
-        }
-        ++sr;
-        /* 1 <= sr <= n_udword_bits - 1 */
-        /* q.all = n.all << (n_utword_bits - sr); */
-        q.s.low = 0;
-        q.s.high = n.s.low << (n_udword_bits - sr);
-        /* r.all = n.all >> sr; */
-        r.s.high = n.s.high >> sr;
-        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
-    }
-    else  /* d.s.low != 0 */
-    {
-        if (d.s.high == 0)
-        {
-            /* K X
-             * ---
-             * 0 K
-             */
-            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
-            {
-                if (rem)
-                    *rem = n.s.low & (d.s.low - 1);
-                if (d.s.low == 1)
-                    return n.all;
-                sr = __builtin_ctzll(d.s.low);
-                q.s.high = n.s.high >> sr;
-                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
-                return q.all;
-            }
-            /* K X
-             * ---
-             * 0 K
-             */
-            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
-                                   - __builtin_clzll(n.s.high);
-            /* 2 <= sr <= n_utword_bits - 1
-             * q.all = n.all << (n_utword_bits - sr);
-             * r.all = n.all >> sr;
-             */
-            if (sr == n_udword_bits)
-            {
-                q.s.low = 0;
-                q.s.high = n.s.low;
-                r.s.high = 0;
-                r.s.low = n.s.high;
-            }
-            else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
-            {
-                q.s.low = 0;
-                q.s.high = n.s.low << (n_udword_bits - sr);
-                r.s.high = n.s.high >> sr;
-                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
-            }
-            else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
-            {
-                q.s.low = n.s.low << (n_utword_bits - sr);
-                q.s.high = (n.s.high << (n_utword_bits - sr)) |
-                           (n.s.low >> (sr - n_udword_bits));
-                r.s.high = 0;
-                r.s.low = n.s.high >> (sr - n_udword_bits);
-            }
-        }
-        else
-        {
-            /* K X
-             * ---
-             * K K
-             */
-            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
-            /*0 <= sr <= n_udword_bits - 1 or sr large */
-            if (sr > n_udword_bits - 1)
-            {
-               if (rem)
-                    *rem = n.all;
-                return 0;
-            }
-            ++sr;
-            /* 1 <= sr <= n_udword_bits
-             * q.all = n.all << (n_utword_bits - sr);
-             * r.all = n.all >> sr;
-             */
-            q.s.low = 0;
-            if (sr == n_udword_bits)
-            {
-                q.s.high = n.s.low;
-                r.s.high = 0;
-                r.s.low = n.s.high;
-            }
-            else
-            {
-                r.s.high = n.s.high >> sr;
-                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
-                q.s.high = n.s.low << (n_udword_bits - sr);
-            }
-        }
-    }
-    /* Not a special case
-     * q and r are initialized with:
-     * q.all = n.all << (n_utword_bits - sr);
-     * r.all = n.all >> sr;
-     * 1 <= sr <= n_utword_bits - 1
-     */
-    su_int carry = 0;
-    for (; sr > 0; --sr)
-    {
-        /* r:q = ((r:q)  << 1) | carry */
-        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
-        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
-        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
-        q.s.low  = (q.s.low  << 1) | carry;
-        /* carry = 0;
-         * if (r.all >= d.all)
-         * {
-         *     r.all -= d.all;
-         *      carry = 1;
-         * }
-         */
-        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
-        carry = s & 1;
-        r.all -= d.all & s;
-    }
-    q.all = (q.all << 1) | carry;
-    if (rem)
-        *rem = r.all;
-    return q.all;
+forceinline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
+                                          du_int *r) {
+  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+  const du_int b = 1ULL << (n_udword_bits / 2);  // Number base (32 bits)
+  du_int un1, un0;                               // Norm. dividend LSD's
+  du_int vn1, vn0;                               // Norm. divisor digits
+  du_int q1, q0;                                 // Quotient digits
+  du_int un64, un21, un10;                       // Dividend digit pairs
+  du_int rhat;                                   // Remainder
+  si_int s;                                      // Normalization shift
+  s = __builtin_clzll(v);
+  if (s > 0) {
+    // Normalize the divisor.
+    v = v << s;
+    un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
+    un10 = u0 << s;  // Shift dividend left
+  } else {
+    // Avoid undefined behavior of (u0 >> 64).
+    un64 = u1;
+    un10 = u0;
+  }
+  // Break divisor up into two 32-bit digits.
+  vn1 = v >> (n_udword_bits / 2);
+  vn0 = v & 0xFFFFFFFF;
+  // Break right half of dividend into two digits.
+  un1 = un10 >> (n_udword_bits / 2);
+  un0 = un10 & 0xFFFFFFFF;
+  // Compute the first quotient digit, q1.
+  q1 = un64 / vn1;
+  rhat = un64 - q1 * vn1;
+  // q1 has at most error 2. No more than 2 iterations.
+  while (q1 >= b || q1 * vn0 > b * rhat + un1) {
+    q1 = q1 - 1;
+    rhat = rhat + vn1;
+    if (rhat >= b) break;
+  }
+  un21 = un64 * b + un1 - q1 * v;
+  // Compute the second quotient digit.
+  q0 = un21 / vn1;
+  rhat = un21 - q0 * vn1;
+  // q0 has at most error 2. No more than 2 iterations.
+  while (q0 >= b || q0 * vn0 > b * rhat + un0) {
+    q0 = q0 - 1;
+    rhat = rhat + vn1;
+    if (rhat >= b) break;
+  }
+  *r = (un21 * b + un0 - q0 * v) >> s;
+  return q1 * b + q0;
 }

-#endif /* CRT_HAS_128BIT */
+forceinline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) {
+#ifdef __x86_64__
+  du_int result;
+  asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "a"(u0), "d"(u1) : "cc");
+  return result;
+#else
+  return udiv128by64to64default(u1, u0, v, r);
+#endif
+}
+
+/**
+ * Performs 128-bit unsigned division and remainder.
+ *
+ * @param a is dividend
+ * @param b is divisor
+ * @param rem receives remainder if not NULL
+ */
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
+  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+  utwords dividend, divisor, quotient, remainder;
+  si_int shift;
+  dividend.all = a;
+  divisor.all = b;
+  if (divisor.all > dividend.all) {
+    if (rem) *rem = dividend.all;
+    return 0;
+  }
+  // When the divisor fits in 64 bits, we can use an optimized path.
+  if (divisor.s.high == 0) {
+    remainder.s.high = 0;
+    if (dividend.s.high < divisor.s.low) {
+      // The result fits in 64 bits.
+      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+                                       divisor.s.low, &remainder.s.low);
+      quotient.s.high = 0;
+    } else {
+      // First, divide with the high part to get the remainder in
+      // dividend.s.high. After that dividend.s.high < divisor.s.low.
+      quotient.s.high = dividend.s.high / divisor.s.low;
+      dividend.s.high = dividend.s.high % divisor.s.low;
+      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+                                       divisor.s.low, &remainder.s.low);
+    }
+    if (rem) *rem = remainder.all;
+    return quotient.all;
+  }
+  // 0 <= shift <= 63.
+  shift = __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
+  divisor.all <<= shift;
+  quotient.s.high = 0;
+  quotient.s.low = 0;
+  for (; shift >= 0; --shift) {
+    quotient.s.low <<= 1;
+    // Branch free version of.
+    // if (dividend.all >= divisor.all)
+    // {
+    //    dividend.all -= divisor.all;
+    //    carry = 1;
+    // }
+    ti_int s = (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
+    quotient.s.low |= s & 1;
+    dividend.all -= divisor.all & s;
+    divisor.all >>= 1;
+  }
+  if (rem) *rem = dividend.all;
+  return quotient.all;
+}
--- a/third_party/compiler_rt/udivti3.c
+++ b/third_party/compiler_rt/udivti3.c
@ -1,30 +1,14 @@
-/* clang-format off */
-/* ===-- udivti3.c - Implement __udivti3 -----------------------------------===
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is dual licensed under the MIT and the University of Illinois Open
- * Source Licenses. See LICENSE.TXT for details.
- *
- * ===----------------------------------------------------------------------===
- *
- * This file implements __udivti3 for the compiler_rt library.
- *
- * ===----------------------------------------------------------------------===
- */
-
-STATIC_YOINK("huge_compiler_rt_license");
-
+#if 0
+/*─────────────────────────────────────────────────────────────────╗
+│ To the extent possible under law, Justine Tunney has waived      │
+│ all copyright and related or neighboring rights to division,     │
+│ as it is written in the following disclaimers:                   │
+│   • http://unlicense.org/                                        │
+│   • http://creativecommons.org/publicdomain/zero/1.0/            │
+╚─────────────────────────────────────────────────────────────────*/
+#endif
 #include "third_party/compiler_rt/int_lib.h"

-#ifdef CRT_HAS_128BIT
-
-/* Returns: a / b */
-
-COMPILER_RT_ABI tu_int
-__udivti3(tu_int a, tu_int b)
-{
-    return __udivmodti4(a, b, 0);
+COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
+  return __udivmodti4(a, b, NULL);
 }
-
-#endif /* CRT_HAS_128BIT */