mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-28 22:18:29 +00:00
I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
137 lines
5 KiB
C
137 lines
5 KiB
C
#if 0
|
|
/*─────────────────────────────────────────────────────────────────╗
|
|
│ To the extent possible under law, Justine Tunney has waived │
|
|
│ all copyright and related or neighboring rights to division, │
|
|
│ as it is written in the following disclaimers: │
|
|
│ • http://unlicense.org/ │
|
|
│ • http://creativecommons.org/publicdomain/zero/1.0/ │
|
|
╚─────────────────────────────────────────────────────────────────*/
|
|
#endif
|
|
#include "third_party/compiler_rt/int_lib.h"
|
|
|
|
/**
|
|
* Returns 128 bit division result by 64 bit.
|
|
*
|
|
* Result must fit in 64 bits. Remainder is stored in r.
|
|
*
|
|
* @see libdivide libdivide_128_div_64_to_64() division fallback
|
|
* @see Knuth, Volume 2, section 4.3.1, Algorithm D for correctness proof
|
|
* @see https://danlark.org/2020/06/14/128-bit-division/
|
|
*/
|
|
forceinline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
|
|
du_int *r) {
|
|
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
|
|
const du_int b = 1ULL << (n_udword_bits / 2); // Number base (32 bits)
|
|
du_int un1, un0; // Norm. dividend LSD's
|
|
du_int vn1, vn0; // Norm. divisor digits
|
|
du_int q1, q0; // Quotient digits
|
|
du_int un64, un21, un10; // Dividend digit pairs
|
|
du_int rhat; // Remainder
|
|
si_int s; // Normalization shift
|
|
s = __builtin_clzll(v);
|
|
if (s > 0) {
|
|
// Normalize the divisor.
|
|
v = v << s;
|
|
un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
|
|
un10 = u0 << s; // Shift dividend left
|
|
} else {
|
|
// Avoid undefined behavior of (u0 >> 64).
|
|
un64 = u1;
|
|
un10 = u0;
|
|
}
|
|
// Break divisor up into two 32-bit digits.
|
|
vn1 = v >> (n_udword_bits / 2);
|
|
vn0 = v & 0xFFFFFFFF;
|
|
// Break right half of dividend into two digits.
|
|
un1 = un10 >> (n_udword_bits / 2);
|
|
un0 = un10 & 0xFFFFFFFF;
|
|
// Compute the first quotient digit, q1.
|
|
q1 = un64 / vn1;
|
|
rhat = un64 - q1 * vn1;
|
|
// q1 has at most error 2. No more than 2 iterations.
|
|
while (q1 >= b || q1 * vn0 > b * rhat + un1) {
|
|
q1 = q1 - 1;
|
|
rhat = rhat + vn1;
|
|
if (rhat >= b) break;
|
|
}
|
|
un21 = un64 * b + un1 - q1 * v;
|
|
// Compute the second quotient digit.
|
|
q0 = un21 / vn1;
|
|
rhat = un21 - q0 * vn1;
|
|
// q0 has at most error 2. No more than 2 iterations.
|
|
while (q0 >= b || q0 * vn0 > b * rhat + un0) {
|
|
q0 = q0 - 1;
|
|
rhat = rhat + vn1;
|
|
if (rhat >= b) break;
|
|
}
|
|
*r = (un21 * b + un0 - q0 * v) >> s;
|
|
return q1 * b + q0;
|
|
}
|
|
|
|
forceinline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) {
|
|
#ifdef __x86_64__
|
|
du_int result;
|
|
asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "a"(u0), "d"(u1) : "cc");
|
|
return result;
|
|
#else
|
|
return udiv128by64to64default(u1, u0, v, r);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Performs 128-bit unsigned division and remainder.
|
|
*
|
|
* @param a is dividend
|
|
* @param b is divisor
|
|
* @param rem receives remainder if not NULL
|
|
*/
|
|
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
|
|
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
|
|
utwords dividend, divisor, quotient, remainder;
|
|
si_int shift;
|
|
dividend.all = a;
|
|
divisor.all = b;
|
|
if (divisor.all > dividend.all) {
|
|
if (rem) *rem = dividend.all;
|
|
return 0;
|
|
}
|
|
// When the divisor fits in 64 bits, we can use an optimized path.
|
|
if (divisor.s.high == 0) {
|
|
remainder.s.high = 0;
|
|
if (dividend.s.high < divisor.s.low) {
|
|
// The result fits in 64 bits.
|
|
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
|
|
divisor.s.low, &remainder.s.low);
|
|
quotient.s.high = 0;
|
|
} else {
|
|
// First, divide with the high part to get the remainder in
|
|
// dividend.s.high. After that dividend.s.high < divisor.s.low.
|
|
quotient.s.high = dividend.s.high / divisor.s.low;
|
|
dividend.s.high = dividend.s.high % divisor.s.low;
|
|
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
|
|
divisor.s.low, &remainder.s.low);
|
|
}
|
|
if (rem) *rem = remainder.all;
|
|
return quotient.all;
|
|
}
|
|
// 0 <= shift <= 63.
|
|
shift = __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
|
|
divisor.all <<= shift;
|
|
quotient.s.high = 0;
|
|
quotient.s.low = 0;
|
|
for (; shift >= 0; --shift) {
|
|
quotient.s.low <<= 1;
|
|
// Branch free version of.
|
|
// if (dividend.all >= divisor.all)
|
|
// {
|
|
// dividend.all -= divisor.all;
|
|
// carry = 1;
|
|
// }
|
|
ti_int s = (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
|
|
quotient.s.low |= s & 1;
|
|
dividend.all -= divisor.all & s;
|
|
divisor.all >>= 1;
|
|
}
|
|
if (rem) *rem = dividend.all;
|
|
return quotient.all;
|
|
}
|