Make numerous improvements

- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
2025-07-03 01:38:30 +00:00 · 2021-09-27 22:58:51 -07:00 · 2021-09-27 22:58:51 -07:00 · 39bf41f4eb
commit 39bf41f4eb
parent fa7b4f5bd1
806 changed files with 77494 additions and 63859 deletions
--- a/third_party/python/Modules/_decimal/libmpdec/basearith.c
+++ b/third_party/python/Modules/_decimal/libmpdec/basearith.c
@ -42,7 +42,6 @@ asm(".include \"libc/disclaimer.inc\"");
 /*                   Calculations in base MPD_RADIX                  */
 /*********************************************************************/

-
 /*
 * Knuth, TAOCP, Volume 2, 4.3.1:
 *    w := sum of u (len m) and v (len n)
@ -56,9 +55,7 @@ _mpd_baseadd(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
    mpd_uint_t s;
    mpd_uint_t carry = 0;
    mpd_size_t i;
-
    assert(n > 0 && m >= n);
-
    /* add n members of u and v */
    for (i = 0; i < n; i++) {
        s = u[i] + (v[i] + carry);
@ -75,7 +72,6 @@ _mpd_baseadd(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
    for (; i < m; i++) {
        w[i] = u[i];
    }
-
    return carry;
 }

@ -89,9 +85,7 @@ _mpd_baseaddto(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n)
    mpd_uint_t s;
    mpd_uint_t carry = 0;
    mpd_size_t i;
-
    if (n == 0) return;
-
    /* add n members of u to w */
    for (i = 0; i < n; i++) {
        s = w[i] + (u[i] + carry);
@ -116,21 +110,17 @@ _mpd_shortadd(mpd_uint_t *w, mpd_size_t m, mpd_uint_t v)
    mpd_uint_t s;
    mpd_uint_t carry;
    mpd_size_t i;
-
    assert(m > 0);
-
    /* add v to w */
    s = w[0] + v;
    carry = (s < v) | (s >= MPD_RADIX);
    w[0] = carry ? s-MPD_RADIX : s;
-
    /* if there is a carry, propagate it */
    for (i = 1; carry && i < m; i++) {
        s = w[i] + carry;
        carry = (s == MPD_RADIX);
        w[i] = carry ? 0 : s;
    }
-
    return carry;
 }

@ -141,16 +131,13 @@ _mpd_baseincr(mpd_uint_t *u, mpd_size_t n)
    mpd_uint_t s;
    mpd_uint_t carry = 1;
    mpd_size_t i;
-
    assert(n > 0);
-
    /* if there is a carry, propagate it */
    for (i = 0; carry && i < n; i++) {
        s = u[i] + carry;
        carry = (s == MPD_RADIX);
        u[i] = carry ? 0 : s;
    }
-
    return carry;
 }

@ -166,9 +153,7 @@ _mpd_basesub(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
    mpd_uint_t d;
    mpd_uint_t borrow = 0;
    mpd_size_t i;
-
    assert(m > 0 && n > 0);
-
    /* subtract n members of v from u */
    for (i = 0; i < n; i++) {
        d = u[i] - (v[i] + borrow);
@ -197,9 +182,7 @@ _mpd_basesubfrom(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n)
    mpd_uint_t d;
    mpd_uint_t borrow = 0;
    mpd_size_t i;
-
    if (n == 0) return;
-
    /* subtract n members of u from w */
    for (i = 0; i < n; i++) {
        d = w[i] - (u[i] + borrow);
@ -221,15 +204,11 @@ _mpd_shortmul(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n, mpd_uint_t v)
    mpd_uint_t hi, lo;
    mpd_uint_t carry = 0;
    mpd_size_t i;
-
    assert(n > 0);
-
    for (i=0; i < n; i++) {
-
        _mpd_mul_words(&hi, &lo, u[i], v);
        lo = carry + lo;
        if (lo < carry) hi++;
-
        _mpd_div_words_r(&carry, &w[i], hi, lo);
    }
    w[i] = carry;
@ -247,19 +226,15 @@ _mpd_basemul(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
    mpd_uint_t hi, lo;
    mpd_uint_t carry;
    mpd_size_t i, j;
-
    assert(m > 0 && n > 0);
-
    for (j=0; j < n; j++) {
        carry = 0;
        for (i=0; i < m; i++) {
-
            _mpd_mul_words(&hi, &lo, u[i], v[j]);
            lo = w[i+j] + lo;
            if (lo < w[i+j]) hi++;
            lo = carry + lo;
            if (lo < carry) hi++;
-
            _mpd_div_words_r(&carry, &w[i+j], hi, lo);
        }
        w[j+m] = carry;
@ -276,18 +251,13 @@ _mpd_shortdiv(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n, mpd_uint_t v)
    mpd_uint_t hi, lo;
    mpd_uint_t rem = 0;
    mpd_size_t i;
-
    assert(n > 0);
-
    for (i=n-1; i != MPD_SIZE_MAX; i--) {
-
        _mpd_mul_words(&hi, &lo, rem, MPD_RADIX);
        lo = u[i] + lo;
        if (lo < u[i]) hi++;
-
        _mpd_div_words(&w[i], &rem, hi, lo, v);
    }
-
    return rem;
 }

@ -315,13 +285,10 @@ _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
    mpd_uint_t carry;
    mpd_size_t i, j, m;
    int retval = 0;
-
    assert(n > 1 && nplusm >= n);
    m = sub_size_t(nplusm, n);
-
    /* D1: normalize */
    d = MPD_RADIX / (vconst[n-1] + 1);
-
    if (nplusm >= MPD_MINALLOC_MAX) {
        if ((u = mpd_alloc(nplusm+1, sizeof *u)) == NULL) {
            return -1;
@ -333,17 +300,13 @@ _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
            return -1;
        }
    }
-
    _mpd_shortmul(u, uconst, nplusm, d);
    _mpd_shortmul(v, vconst, n, d);
-
    /* D2: loop */
    for (j=m; j != MPD_SIZE_MAX; j--) {
-
        /* D3: calculate qhat and rhat */
        rhat = _mpd_shortdiv(w2, u+j+n-1, 2, v[n-1]);
        qhat = w2[1] * MPD_RADIX + w2[0];
-
        while (1) {
            if (qhat < MPD_RADIX) {
                _mpd_singlemul(w2, qhat, v[n-2]);
@ -362,14 +325,10 @@ _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
        /* D4: multiply and subtract */
        carry = 0;
        for (i=0; i <= n; i++) {
-
            _mpd_mul_words(&hi, &lo, qhat, v[i]);
-
            lo = carry + lo;
            if (lo < carry) hi++;
-
            _mpd_div_words_r(&hi, &lo, hi, lo);
-
            x = u[i+j] - lo;
            carry = (u[i+j] < x);
            u[i+j] = carry ? x+MPD_RADIX : x;
@ -383,7 +342,6 @@ _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
            (void)_mpd_baseadd(u+j, u+j, v, n+1, n);
        }
    }
-
    /* D8: unnormalize */
    if (r != NULL) {
        _mpd_shortdiv(r, u, n, d);
@ -393,11 +351,9 @@ _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
    else {
        retval = !_mpd_isallzero(u, n);
    }
-
-
-if (u != ustatic) mpd_free(u);
-if (v != vstatic) mpd_free(v);
-return retval;
+    if (u != ustatic) mpd_free(u);
+    if (v != vstatic) mpd_free(v);
+    return retval;
 }

 /*
@ -428,23 +384,13 @@ void
 _mpd_baseshiftl(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t n, mpd_size_t m,
                mpd_size_t shift)
 {
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
-    /* spurious uninitialized warnings */
-    mpd_uint_t l=l, lprev=lprev, h=h;
-#else
-    mpd_uint_t l, lprev, h;
-#endif
+    mpd_uint_t l=l, lprev=lprev, h=h; /* b/c warnings */
    mpd_uint_t q, r;
    mpd_uint_t ph;
-
    assert(m > 0 && n >= m);
-
    _mpd_div_word(&q, &r, (mpd_uint_t)shift, MPD_RDIGITS);
-
    if (r != 0) {
-
        ph = mpd_pow10[r];
-
        --m; --n;
        _mpd_divmod_pow10(&h, &lprev, src[m--], MPD_RDIGITS-r);
        if (h != 0) { /* r + msdigits > rdigits <==> h != 0 */
@ -464,7 +410,6 @@ _mpd_baseshiftl(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t n, mpd_size_t m,
            dest[m+q] = src[m];
        }
    }
-
    mpd_uint_zero(dest, q);
 }

@ -497,29 +442,19 @@ mpd_uint_t
 _mpd_baseshiftr(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t slen,
                mpd_size_t shift)
 {
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
    /* spurious uninitialized warnings */
    mpd_uint_t l=l, h=h, hprev=hprev; /* low, high, previous high */
-#else
-    mpd_uint_t l, h, hprev; /* low, high, previous high */
-#endif
    mpd_uint_t rnd, rest;   /* rounding digit, rest */
    mpd_uint_t q, r;
    mpd_size_t i, j;
    mpd_uint_t ph;
-
    assert(slen > 0);
-
    _mpd_div_word(&q, &r, (mpd_uint_t)shift, MPD_RDIGITS);
-
    rnd = rest = 0;
    if (r != 0) {
-
        ph = mpd_pow10[MPD_RDIGITS-r];
-
        _mpd_divmod_pow10(&hprev, &rest, src[q], r);
        _mpd_divmod_pow10(&rnd, &rest, rest, r-1);
-
        if (rest == 0 && q > 0) {
            rest = !_mpd_isallzero(src, q);
        }
@ -544,14 +479,12 @@ _mpd_baseshiftr(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t slen,
            dest[j] = src[q+j];
        }
    }
-
    /* 0-4  ==> rnd+rest < 0.5   */
    /* 5    ==> rnd+rest == 0.5  */
    /* 6-9  ==> rnd+rest > 0.5   */
    return (rnd == 0 || rnd == 5) ? rnd + !!rest : rnd;
 }

-
 /*********************************************************************/
 /*                      Calculations in base b                       */
 /*********************************************************************/
@ -566,21 +499,17 @@ _mpd_shortadd_b(mpd_uint_t *w, mpd_size_t m, mpd_uint_t v, mpd_uint_t b)
    mpd_uint_t s;
    mpd_uint_t carry;
    mpd_size_t i;
-
    assert(m > 0);
-
    /* add v to w */
    s = w[0] + v;
    carry = (s < v) | (s >= b);
    w[0] = carry ? s-b : s;
-
    /* if there is a carry, propagate it */
    for (i = 1; carry && i < m; i++) {
        s = w[i] + carry;
        carry = (s == b);
        w[i] = carry ? 0 : s;
    }
-
    return carry;
 }

@ -591,18 +520,13 @@ _mpd_shortmul_c(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n, mpd_uint_t v)
    mpd_uint_t hi, lo;
    mpd_uint_t carry = 0;
    mpd_size_t i;
-
    assert(n > 0);
-
    for (i=0; i < n; i++) {
-
        _mpd_mul_words(&hi, &lo, u[i], v);
        lo = carry + lo;
        if (lo < carry) hi++;
-
        _mpd_div_words_r(&carry, &w[i], hi, lo);
    }
-
    return carry;
 }

@ -614,18 +538,13 @@ _mpd_shortmul_b(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n,
    mpd_uint_t hi, lo;
    mpd_uint_t carry = 0;
    mpd_size_t i;
-
    assert(n > 0);
-
    for (i=0; i < n; i++) {
-
        _mpd_mul_words(&hi, &lo, u[i], v);
        lo = carry + lo;
        if (lo < carry) hi++;
-
        _mpd_div_words(&carry, &w[i], hi, lo, b);
    }
-
    return carry;
 }

@ -640,17 +559,12 @@ _mpd_shortdiv_b(mpd_uint_t *w, const mpd_uint_t *u, mpd_size_t n,
    mpd_uint_t hi, lo;
    mpd_uint_t rem = 0;
    mpd_size_t i;
-
    assert(n > 0);
-
    for (i=n-1; i != MPD_SIZE_MAX; i--) {
-
        _mpd_mul_words(&hi, &lo, rem, b);
        lo = u[i] + lo;
        if (lo < u[i]) hi++;
-
        _mpd_div_words(&w[i], &rem, hi, lo, v);
    }
-
    return rem;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/basearith.h
+++ b/third_party/python/Modules/_decimal/libmpdec/basearith.h
@ -5,7 +5,6 @@
 /* clang-format off */

 /* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

 mpd_uint_t _mpd_baseadd(mpd_uint_t *w, const mpd_uint_t *u, const mpd_uint_t *v,
                        mpd_size_t m, mpd_size_t n);
@ -36,7 +35,6 @@ void _mpd_baseshiftl(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t n,
 mpd_uint_t _mpd_baseshiftr(mpd_uint_t *dest, mpd_uint_t *src, mpd_size_t slen,
                           mpd_size_t shift);

-#ifdef CONFIG_64
 extern const mpd_uint_t mprime_rdx;

 /*
@ -66,12 +64,10 @@ _mpd_div_words_r(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo)
 {
    mpd_uint_t n_adj, h, l, t;
    mpd_uint_t n1_neg;
-
    /* n1_neg = if lo >= 2**63 then MPD_UINT_MAX else 0 */
    n1_neg = (lo & (1ULL<<63)) ? MPD_UINT_MAX : 0;
    /* n_adj = if lo >= 2**63 then lo+MPD_RADIX else lo */
    n_adj = lo + (n1_neg & MPD_RADIX);
-
    /* (h, l) = if lo >= 2**63 then m'*(hi+1) else m'*hi */
    _mpd_mul_words(&h, &l, mprime_rdx, hi-n1_neg);
    l = l + n_adj;
@ -80,10 +76,8 @@ _mpd_div_words_r(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo)
    /* At this point t == qest, with q == qest or q == qest+1:
     *   1) 0 <= 2**64*hi + lo - qest*MPD_RADIX < 2*MPD_RADIX
     */
-
    /* t = 2**64-1 - qest = 2**64 - (qest+1) */
    t = MPD_UINT_MAX - t;
-
    /* (h, l) = 2**64*MPD_RADIX - (qest+1)*MPD_RADIX */
    _mpd_mul_words(&h, &l, t, MPD_RADIX);
    l = l + lo;
@ -100,25 +94,15 @@ _mpd_div_words_r(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo)
     *     b) q := h - t == qest
     *     c) r := l + MPD_RADIX = r
     */
-
    *q = (h - t);
    *r = l + (MPD_RADIX & h);
 }
-#else
-static inline void
-_mpd_div_words_r(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo)
-{
-    _mpd_div_words(q, r, hi, lo, MPD_RADIX);
-}
-#endif
-

 /* Multiply two single base MPD_RADIX words, store result in array w[2]. */
 static inline void
 _mpd_singlemul(mpd_uint_t w[2], mpd_uint_t u, mpd_uint_t v)
 {
    mpd_uint_t hi, lo;
-
    _mpd_mul_words(&hi, &lo, u, v);
    _mpd_div_words_r(&w[1], &w[0], hi, lo);
 }
@ -128,21 +112,17 @@ static inline void
 _mpd_mul_2_le2(mpd_uint_t w[4], mpd_uint_t u[2], mpd_uint_t v[2], mpd_ssize_t m)
 {
    mpd_uint_t hi, lo;
-
    _mpd_mul_words(&hi, &lo, u[0], v[0]);
    _mpd_div_words_r(&w[1], &w[0], hi, lo);
-
    _mpd_mul_words(&hi, &lo, u[1], v[0]);
    lo = w[1] + lo;
    if (lo < w[1]) hi++;
    _mpd_div_words_r(&w[2], &w[1], hi, lo);
    if (m == 1) return;
-
    _mpd_mul_words(&hi, &lo, u[0], v[1]);
    lo = w[1] + lo;
    if (lo < w[1]) hi++;
    _mpd_div_words_r(&w[3], &w[1], hi, lo);
-
    _mpd_mul_words(&hi, &lo, u[1], v[1]);
    lo = w[2] + lo;
    if (lo < w[2]) hi++;
@ -151,7 +131,6 @@ _mpd_mul_2_le2(mpd_uint_t w[4], mpd_uint_t u[2], mpd_uint_t v[2], mpd_ssize_t m)
    _mpd_div_words_r(&w[3], &w[2], hi, lo);
 }

-
 /*
 * Test if all words from data[len-1] to data[0] are zero. If len is 0, nothing
 * is tested and the coefficient is regarded as "all zero".
@ -178,6 +157,5 @@ _mpd_isallnine(const mpd_uint_t *data, mpd_ssize_t len)
    return 1;
 }

-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */

 #endif /* BASEARITH_H */
--- a/third_party/python/Modules/_decimal/libmpdec/bits.h
+++ b/third_party/python/Modules/_decimal/libmpdec/bits.h
@ -3,41 +3,35 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 /* clang-format off */

-/* Check if n is a power of 2. */
+/*
+ * Check if 𝑛 is a power of 2.
+ */
 static inline int
 ispower2(mpd_size_t n)
 {
    return n != 0 && (n & (n-1)) == 0;
 }

-#if defined(ANSI)
-#error oh no
 /*
- * Return the most significant bit position of n from 0 to 31 (63).
- * Assumptions: n != 0.
+ * Returns most significant bit position of 𝑛.
+ * Assumptions: 𝑛 ≠ 0
 */
 static inline int
 mpd_bsr(mpd_size_t n)
 {
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+    return __builtin_clzll(n) ^ (sizeof(long long) * CHAR_BIT - 1);
+#else
    int pos = 0;
    mpd_size_t tmp;
-
-#ifdef CONFIG_64
-    tmp = n >> 32;
-    if (tmp != 0) { n = tmp; pos += 32; }
-#endif
-    tmp = n >> 16;
-    if (tmp != 0) { n = tmp; pos += 16; }
-    tmp = n >> 8;
-    if (tmp != 0) { n = tmp; pos += 8; }
-    tmp = n >> 4;
-    if (tmp != 0) { n = tmp; pos += 4; }
-    tmp = n >> 2;
-    if (tmp != 0) { n = tmp; pos += 2; }
-    tmp = n >> 1;
-    if (tmp != 0) { n = tmp; pos += 1; }
-
+    tmp = n >> 32; if (tmp != 0) { n = tmp; pos += 32; }
+    tmp = n >> 16; if (tmp != 0) { n = tmp; pos += 16; }
+    tmp = n >>  8; if (tmp != 0) { n = tmp; pos +=  8; }
+    tmp = n >>  4; if (tmp != 0) { n = tmp; pos +=  4; }
+    tmp = n >>  2; if (tmp != 0) { n = tmp; pos +=  2; }
+    tmp = n >>  1; if (tmp != 0) { n = tmp; pos +=  1; }
    return pos + (int)n - 1;
+#endif
 }

 /*
@ -47,9 +41,10 @@ mpd_bsr(mpd_size_t n)
 static inline int
 mpd_bsf(mpd_size_t n)
 {
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+    return __builtin_ctzll(n);
+#else
    int pos;
-
-#ifdef CONFIG_64
    pos = 63;
    if (n & 0x00000000FFFFFFFFULL) { pos -= 32; } else { n >>= 32; }
    if (n & 0x000000000000FFFFULL) { pos -= 16; } else { n >>= 16; }
@ -57,104 +52,8 @@ mpd_bsf(mpd_size_t n)
    if (n & 0x000000000000000FULL) { pos -=  4; } else { n >>=  4; }
    if (n & 0x0000000000000003ULL) { pos -=  2; } else { n >>=  2; }
    if (n & 0x0000000000000001ULL) { pos -=  1; }
-#else
-    pos = 31;
-    if (n & 0x000000000000FFFFUL) { pos -= 16; } else { n >>= 16; }
-    if (n & 0x00000000000000FFUL) { pos -=  8; } else { n >>=  8; }
-    if (n & 0x000000000000000FUL) { pos -=  4; } else { n >>=  4; }
-    if (n & 0x0000000000000003UL) { pos -=  2; } else { n >>=  2; }
-    if (n & 0x0000000000000001UL) { pos -=  1; }
-#endif
    return pos;
-}
-/* END ANSI */
-
-#elif defined(ASM)
-/*
- * Bit scan reverse. Assumptions: a != 0.
- */
-static inline int
-mpd_bsr(mpd_size_t a)
-{
-    mpd_size_t retval;
-
-    __asm__ (
-#ifdef CONFIG_64
-        "bsrq %1, %0\n\t"
-#else
-        "bsr %1, %0\n\t"
 #endif
-        :"=r" (retval)
-        :"r" (a)
-        :"cc"
-    );
-
-    return (int)retval;
 }

-/*
- * Bit scan forward. Assumptions: a != 0.
- */
-static inline int
-mpd_bsf(mpd_size_t a)
-{
-    mpd_size_t retval;
-    __asm__ (
-#ifdef CONFIG_64
-        "bsfq %1, %0\n\t"
-#else
-        "bsf %1, %0\n\t"
-#endif
-        :"=r" (retval)
-        :"r" (a)
-        :"cc"
-    );
-    return (int)retval;
-}
-/* END ASM */
-
-#elif defined(MASM)
-#include <intrin.h>
-/*
- * Bit scan reverse. Assumptions: a != 0.
- */
-static inline int __cdecl
-mpd_bsr(mpd_size_t a)
-{
-    unsigned long retval;
-
-#ifdef CONFIG_64
-    _BitScanReverse64(&retval, a);
-#else
-    _BitScanReverse(&retval, a);
-#endif
-
-    return (int)retval;
-}
-
-/*
- * Bit scan forward. Assumptions: a != 0.
- */
-static inline int __cdecl
-mpd_bsf(mpd_size_t a)
-{
-    unsigned long retval;
-
-#ifdef CONFIG_64
-    _BitScanForward64(&retval, a);
-#else
-    _BitScanForward(&retval, a);
-#endif
-
-    return (int)retval;
-}
-/* END MASM (_MSC_VER) */
-#else
-  #error "missing preprocessor definitions"
-#endif /* BSR/BSF */
-
-
 #endif /* BITS_H */
-
-
-
--- a/third_party/python/Modules/_decimal/libmpdec/constants.c
+++ b/third_party/python/Modules/_decimal/libmpdec/constants.c
@ -36,86 +36,45 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-#if defined(CONFIG_64)
+/* number-theory.c */
+const mpd_uint_t mpd_moduli[3] = {
+  18446744069414584321ULL, 18446744056529682433ULL, 18446742974197923841ULL
+};
+const mpd_uint_t mpd_roots[3]  = {7ULL, 10ULL, 19ULL};

-  /* number-theory.c */
-  const mpd_uint_t mpd_moduli[3] = {
-    18446744069414584321ULL, 18446744056529682433ULL, 18446742974197923841ULL
-  };
-  const mpd_uint_t mpd_roots[3]  = {7ULL, 10ULL, 19ULL};
+/* crt.c */
+const mpd_uint_t INV_P1_MOD_P2   = 18446744055098026669ULL;
+const mpd_uint_t INV_P1P2_MOD_P3 = 287064143708160ULL;
+const mpd_uint_t LH_P1P2 = 18446744052234715137ULL;     /* (P1*P2) % 2^64 */
+const mpd_uint_t UH_P1P2 = 18446744052234715141ULL;     /* (P1*P2) / 2^64 */

-  /* crt.c */
-  const mpd_uint_t INV_P1_MOD_P2   = 18446744055098026669ULL;
-  const mpd_uint_t INV_P1P2_MOD_P3 = 287064143708160ULL;
-  const mpd_uint_t LH_P1P2 = 18446744052234715137ULL;     /* (P1*P2) % 2^64 */
-  const mpd_uint_t UH_P1P2 = 18446744052234715141ULL;     /* (P1*P2) / 2^64 */
+/* transpose.c */
+const mpd_size_t mpd_bits[64] = {
+  1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024,  2048, 4096, 8192, 16384,
+  32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608,
+  16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824,
+  2147483648ULL, 4294967296ULL, 8589934592ULL, 17179869184ULL, 34359738368ULL,
+  68719476736ULL, 137438953472ULL, 274877906944ULL, 549755813888ULL,
+  1099511627776ULL, 2199023255552ULL, 4398046511104, 8796093022208ULL,
+  17592186044416ULL, 35184372088832ULL, 70368744177664ULL, 140737488355328ULL,
+  281474976710656ULL, 562949953421312ULL, 1125899906842624ULL,
+  2251799813685248ULL, 4503599627370496ULL, 9007199254740992ULL,
+  18014398509481984ULL, 36028797018963968ULL, 72057594037927936ULL,
+  144115188075855872ULL, 288230376151711744ULL, 576460752303423488ULL,
+  1152921504606846976ULL, 2305843009213693952ULL, 4611686018427387904ULL,
+  9223372036854775808ULL
+};

-  /* transpose.c */
-  const mpd_size_t mpd_bits[64] = {
-    1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024,  2048, 4096, 8192, 16384,
-    32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608,
-    16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824,
-    2147483648ULL, 4294967296ULL, 8589934592ULL, 17179869184ULL, 34359738368ULL,
-    68719476736ULL, 137438953472ULL, 274877906944ULL, 549755813888ULL,
-    1099511627776ULL, 2199023255552ULL, 4398046511104, 8796093022208ULL,
-    17592186044416ULL, 35184372088832ULL, 70368744177664ULL, 140737488355328ULL,
-    281474976710656ULL, 562949953421312ULL, 1125899906842624ULL,
-    2251799813685248ULL, 4503599627370496ULL, 9007199254740992ULL,
-    18014398509481984ULL, 36028797018963968ULL, 72057594037927936ULL,
-    144115188075855872ULL, 288230376151711744ULL, 576460752303423488ULL,
-    1152921504606846976ULL, 2305843009213693952ULL, 4611686018427387904ULL,
-    9223372036854775808ULL
-  };
+/* mpdecimal.c */
+const mpd_uint_t mpd_pow10[MPD_RDIGITS+1] = {
+  1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000,
+  10000000000ULL,100000000000ULL,1000000000000ULL,10000000000000ULL,
+  100000000000000ULL,1000000000000000ULL,10000000000000000ULL,
+  100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL
+};

-  /* mpdecimal.c */
-  const mpd_uint_t mpd_pow10[MPD_RDIGITS+1] = {
-    1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000,
-    10000000000ULL,100000000000ULL,1000000000000ULL,10000000000000ULL,
-    100000000000000ULL,1000000000000000ULL,10000000000000000ULL,
-    100000000000000000ULL,1000000000000000000ULL,10000000000000000000ULL
-  };
-
-  /* magic number for constant division by MPD_RADIX */
-  const mpd_uint_t mprime_rdx = 15581492618384294730ULL;
-
-#elif defined(CONFIG_32)
-
-  /* number-theory.c */
-  const mpd_uint_t mpd_moduli[3]  = {2113929217UL, 2013265921UL, 1811939329UL};
-  const mpd_uint_t mpd_roots[3]   = {5UL, 31UL, 13UL};
-
-  /* PentiumPro modular multiplication: These constants have to be loaded as
-   * 80 bit long doubles, which are not supported by certain compilers. */
-  const uint32_t mpd_invmoduli[3][3] = {
-    {4293885170U, 2181570688U, 16352U},  /* ((long double) 1 / 2113929217UL) */
-    {1698898177U, 2290649223U, 16352U},  /* ((long double) 1 / 2013265921UL) */
-    {2716021846U, 2545165803U, 16352U}   /* ((long double) 1 / 1811939329UL) */
-  };
-
-  const float MPD_TWO63 = 9223372036854775808.0; /* 2^63 */
-
-  /* crt.c */
-  const mpd_uint_t INV_P1_MOD_P2   = 2013265901UL;
-  const mpd_uint_t INV_P1P2_MOD_P3 = 54UL;
-  const mpd_uint_t LH_P1P2 = 4127195137UL;  /* (P1*P2) % 2^32 */
-  const mpd_uint_t UH_P1P2 = 990904320UL;   /* (P1*P2) / 2^32 */
-
-  /* transpose.c */
-  const mpd_size_t mpd_bits[32] = {
-    1, 2, 4, 8, 16, 32, 64, 128, 256, 512,  1024,  2048, 4096, 8192, 16384,
-    32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608,
-    16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824,
-    2147483648UL
-  };
-
-  /* mpdecimal.c */
-  const mpd_uint_t mpd_pow10[MPD_RDIGITS+1] = {
-    1,10,100,1000,10000,100000,1000000,10000000,100000000,1000000000
-  };
-
-#else
-  #error "CONFIG_64 or CONFIG_32 must be defined."
-#endif
+/* magic number for constant division by MPD_RADIX */
+const mpd_uint_t mprime_rdx = 15581492618384294730ULL;

 const char *mpd_round_string[MPD_ROUND_GUARD] = {
    "ROUND_UP",          /* round away from 0               */
--- a/third_party/python/Modules/_decimal/libmpdec/constants.h
+++ b/third_party/python/Modules/_decimal/libmpdec/constants.h
@ -3,35 +3,12 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 /* clang-format off */

-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-/* choice of optimized functions */
-#if defined(CONFIG_64)
-/* x64 */
-  #define MULMOD(a, b) x64_mulmod(a, b, umod)
-  #define MULMOD2C(a0, a1, w) x64_mulmod2c(a0, a1, w, umod)
-  #define MULMOD2(a0, b0, a1, b1) x64_mulmod2(a0, b0, a1, b1, umod)
-  #define POWMOD(base, exp) x64_powmod(base, exp, umod)
-  #define SETMODULUS(modnum) std_setmodulus(modnum, &umod)
-  #define SIZE3_NTT(x0, x1, x2, w3table) std_size3_ntt(x0, x1, x2, w3table, umod)
-#elif defined(PPRO)
-/* PentiumPro (or later) gcc inline asm */
-  #define MULMOD(a, b) ppro_mulmod(a, b, &dmod, dinvmod)
-  #define MULMOD2C(a0, a1, w) ppro_mulmod2c(a0, a1, w, &dmod, dinvmod)
-  #define MULMOD2(a0, b0, a1, b1) ppro_mulmod2(a0, b0, a1, b1, &dmod, dinvmod)
-  #define POWMOD(base, exp) ppro_powmod(base, exp, &dmod, dinvmod)
-  #define SETMODULUS(modnum) ppro_setmodulus(modnum, &umod, &dmod, dinvmod)
-  #define SIZE3_NTT(x0, x1, x2, w3table) ppro_size3_ntt(x0, x1, x2, w3table, umod, &dmod, dinvmod)
-#else
-  /* ANSI C99 */
-  #define MULMOD(a, b) std_mulmod(a, b, umod)
-  #define MULMOD2C(a0, a1, w) std_mulmod2c(a0, a1, w, umod)
-  #define MULMOD2(a0, b0, a1, b1) std_mulmod2(a0, b0, a1, b1, umod)
-  #define POWMOD(base, exp) std_powmod(base, exp, umod)
-  #define SETMODULUS(modnum) std_setmodulus(modnum, &umod)
-  #define SIZE3_NTT(x0, x1, x2, w3table) std_size3_ntt(x0, x1, x2, w3table, umod)
-#endif
+#define MULMOD(a, b) x64_mulmod(a, b, umod)
+#define MULMOD2C(a0, a1, w) x64_mulmod2c(a0, a1, w, umod)
+#define MULMOD2(a0, b0, a1, b1) x64_mulmod2(a0, b0, a1, b1, umod)
+#define POWMOD(base, exp) x64_powmod(base, exp, umod)
+#define SETMODULUS(modnum) std_setmodulus(modnum, &umod)
+#define SIZE3_NTT(x0, x1, x2, w3table) std_size3_ntt(x0, x1, x2, w3table, umod)

 /* PentiumPro (or later) gcc inline asm */
 extern const float MPD_TWO63;
@ -49,11 +26,4 @@ extern const mpd_uint_t INV_P1P2_MOD_P3;
 extern const mpd_uint_t LH_P1P2;
 extern const mpd_uint_t UH_P1P2;

-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
-
-
 #endif /* CONSTANTS_H */
-
-
-
--- a/third_party/python/Modules/_decimal/libmpdec/context.c
+++ b/third_party/python/Modules/_decimal/libmpdec/context.c
@ -38,14 +38,14 @@ Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

 void
-mpd_dflt_traphandler(mpd_context_t *ctx UNUSED)
+mpd_dflt_traphandler(mpd_context_t *ctx)
 {
+    (void)ctx;
    raise(SIGFPE);
 }

 void (* mpd_traphandler)(mpd_context_t *) = mpd_dflt_traphandler;

-
 /* Set guaranteed minimum number of coefficient words. The function may
   be used once at program start. Setting MPD_MINALLOC to out-of-bounds
   values is a catastrophic error, so in that case the function exits rather
@ -54,7 +54,6 @@ void
 mpd_setminalloc(mpd_ssize_t n)
 {
    static int minalloc_is_set = 0;
-
    if (minalloc_is_set) {
        mpd_err_warn("mpd_setminalloc: ignoring request to set "
                     "MPD_MINALLOC a second time\n");
@ -71,18 +70,14 @@ void
 mpd_init(mpd_context_t *ctx, mpd_ssize_t prec)
 {
    mpd_ssize_t ideal_minalloc;
-
    mpd_defaultcontext(ctx);
-
    if (!mpd_qsetprec(ctx, prec)) {
        mpd_addstatus_raise(ctx, MPD_Invalid_context);
        return;
    }
-
    ideal_minalloc = 2 * ((prec+MPD_RDIGITS-1) / MPD_RDIGITS);
    if (ideal_minalloc < MPD_MINALLOC_MIN) ideal_minalloc = MPD_MINALLOC_MIN;
    if (ideal_minalloc > MPD_MINALLOC_MAX) ideal_minalloc = MPD_MINALLOC_MAX;
-
    mpd_setminalloc(ideal_minalloc);
 }

@ -134,7 +129,6 @@ mpd_ieee_context(mpd_context_t *ctx, int bits)
    if (bits <= 0 || bits > MPD_IEEE_CONTEXT_MAX_BITS || bits % 32) {
        return -1;
    }
-
    ctx->prec = 9 * (bits/32) - 2;
    ctx->emax = 3 * ((mpd_ssize_t)1<<(bits/16+3));
    ctx->emin = 1 - ctx->emax;
@ -144,7 +138,6 @@ mpd_ieee_context(mpd_context_t *ctx, int bits)
    ctx->newtrap=0;
    ctx->clamp=1;
    ctx->allcr=1;
-
    return 0;
 }

@ -196,7 +189,6 @@ mpd_getcr(const mpd_context_t *ctx)
    return ctx->allcr;
 }

-
 int
 mpd_qsetprec(mpd_context_t *ctx, mpd_ssize_t prec)
 {
@ -277,7 +269,6 @@ mpd_qsetcr(mpd_context_t *ctx, int c)
    return 1;
 }

-
 void
 mpd_addstatus_raise(mpd_context_t *ctx, uint32_t flags)
 {
--- a/third_party/python/Modules/_decimal/libmpdec/convolute.c
+++ b/third_party/python/Modules/_decimal/libmpdec/convolute.c
@ -44,8 +44,8 @@ Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");


-/* Bignum: Fast convolution using the Number Theoretic Transform. Used for
-   the multiplication of very large coefficients. */
+/* Bignum: Fast convolution using the Number Theoretic Transform.
+   Used for the multiplication of very large coefficients. */


 /* Convolute the data in c1 and c2. Result is in c1. */
@ -54,17 +54,10 @@ fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
 {
    int (*fnt)(mpd_uint_t *, mpd_size_t, int);
    int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t n_inv, umod;
    mpd_size_t i;
-
-
    SETMODULUS(modnum);
    n_inv = POWMOD(n, (umod-2));
-
    if (ispower2(n)) {
        if (n > SIX_STEP_THRESHOLD) {
            fnt = six_step_fnt;
@ -79,7 +72,6 @@ fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
        fnt = four_step_fnt;
        inv_fnt = inv_four_step_fnt;
    }
-
    if (!fnt(c1, n, modnum)) {
        return 0;
    }
@ -95,7 +87,6 @@ fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
        c1[i] = x0;
        c1[i+1] = x1;
    }
-
    if (!inv_fnt(c1, n, modnum)) {
        return 0;
    }
@ -111,7 +102,6 @@ fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
        c1[i+2] = x2;
        c1[i+3] = x3;
    }
-
    return 1;
 }

@ -121,17 +111,10 @@ fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
 {
    int (*fnt)(mpd_uint_t *, mpd_size_t, int);
    int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t n_inv, umod;
    mpd_size_t i;
-
-
    SETMODULUS(modnum);
    n_inv = POWMOD(n, (umod-2));
-
    if (ispower2(n)) {
        if (n > SIX_STEP_THRESHOLD) {
            fnt = six_step_fnt;
@ -146,7 +129,6 @@ fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
        fnt = four_step_fnt;
        inv_fnt = inv_four_step_fnt;
    }
-
    if (!fnt(c1, n, modnum)) {
        return 0;
    }
@ -157,7 +139,6 @@ fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
        c1[i] = x0;
        c1[i+1] = x1;
    }
-
    if (!inv_fnt(c1, n, modnum)) {
        return 0;
    }
@ -173,6 +154,5 @@ fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
        c1[i+2] = x2;
        c1[i+3] = x3;
    }
-
    return 1;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/convolute.h
+++ b/third_party/python/Modules/_decimal/libmpdec/convolute.h
@ -1,16 +1,10 @@
 #ifndef CONVOLUTE_H
 #define CONVOLUTE_H
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-/* clang-format off */
-
-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

 #define SIX_STEP_THRESHOLD 4096

-int fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum);
-int fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum);
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
+int fnt_convolute(mpd_uint_t *, mpd_uint_t *, mpd_size_t, int);
+int fnt_autoconvolute(mpd_uint_t *, mpd_size_t, int);

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/crt.c
+++ b/third_party/python/Modules/_decimal/libmpdec/crt.c
@ -38,23 +38,18 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
 /* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */

-
 /* Multiply P1P2 by v, store result in w. */
 static inline void
 _crt_mulP1P2_3(mpd_uint_t w[3], mpd_uint_t v)
 {
    mpd_uint_t hi1, hi2, lo;
-
    _mpd_mul_words(&hi1, &lo, LH_P1P2, v);
    w[0] = lo;
-
    _mpd_mul_words(&hi2, &lo, UH_P1P2, v);
    lo = hi1 + lo;
    if (lo < hi1) hi2++;
-
    w[1] = lo;
    w[2] = hi2;
 }
@ -65,15 +60,12 @@ _crt_add3(mpd_uint_t w[3], mpd_uint_t v[3])
 {
    mpd_uint_t carry;
    mpd_uint_t s;
-
    s = w[0] + v[0];
    carry = (s < w[0]);
    w[0] = s;
-
    s = w[1] + (v[1] + carry);
    carry = (s < w[1]);
    w[1] = s;
-
    w[2] = w[2] + (v[2] + carry);
 }

@ -83,21 +75,17 @@ _crt_div3(mpd_uint_t *w, const mpd_uint_t *u, mpd_uint_t v)
 {
    mpd_uint_t r1 = u[2];
    mpd_uint_t r2;
-
    if (r1 < v) {
        w[2] = 0;
    }
    else {
        _mpd_div_word(&w[2], &r1, u[2], v); /* GCOV_NOT_REACHED */
    }
-
    _mpd_div_words(&w[1], &r2, r1, u[1], v);
    _mpd_div_words(&w[0], &r1, r2, u[0], v);
-
    return r1;
 }

-
 /*
 * Chinese Remainder Theorem:
 * Algorithm from Joerg Arndt, "Matters Computational",
@ -138,45 +126,32 @@ crt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t rsize)
 {
    mpd_uint_t p1 = mpd_moduli[P1];
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t a1, a2, a3;
    mpd_uint_t s;
    mpd_uint_t z[3], t[3];
    mpd_uint_t carry[3] = {0,0,0};
    mpd_uint_t hi, lo;
    mpd_size_t i;
-
    for (i = 0; i < rsize; i++) {
-
        a1 = x1[i];
        a2 = x2[i];
        a3 = x3[i];
-
        SETMODULUS(P2);
        s = ext_submod(a2, a1, umod);
        s = MULMOD(s, INV_P1_MOD_P2);
-
        _mpd_mul_words(&hi, &lo, s, p1);
        lo = lo + a1;
        if (lo < a1) hi++;
-
        SETMODULUS(P3);
        s = dw_submod(a3, hi, lo, umod);
        s = MULMOD(s, INV_P1P2_MOD_P3);
-
        z[0] = lo;
        z[1] = hi;
        z[2] = 0;
-
        _crt_mulP1P2_3(t, s);
        _crt_add3(z, t);
        _crt_add3(carry, z);
-
        x1[i] = _crt_div3(carry, carry, MPD_RADIX);
    }
-
    assert(carry[0] == 0 && carry[1] == 0 && carry[2] == 0);
 }
--- a/third_party/python/Modules/_decimal/libmpdec/crt.h
+++ b/third_party/python/Modules/_decimal/libmpdec/crt.h
@ -4,10 +4,8 @@
 /* clang-format off */

 /* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

 void crt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t nmemb);

-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/difradix2.c
+++ b/third_party/python/Modules/_decimal/libmpdec/difradix2.c
@ -39,10 +39,8 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
 /* Bignum: The actual transform routine (decimation in frequency). */

-
 /*
 * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
 * n must be a power of two.
@ -55,7 +53,6 @@ bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
    mpd_size_t x = 0;
    mpd_size_t r = 0;
    mpd_uint_t t;
-
    do { /* Invariant: r = bitreverse(x) */
        if (r > x) {
            t = a[x];
@ -72,105 +69,68 @@ bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
    } while (x < n);
 }

-
 /* Fast Number Theoretic Transform, decimation in frequency. */
 void
 fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
 {
    mpd_uint_t *wtable = tparams->wtable;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t u0, u1, v0, v1;
    mpd_uint_t w, w0, w1, wstep;
    mpd_size_t m, mhalf;
    mpd_size_t j, r;
-
-
    assert(ispower2(n));
    assert(n >= 4);
-
    SETMODULUS(tparams->modnum);
-
    /* m == n */
    mhalf = n / 2;
    for (j = 0; j < mhalf; j += 2) {
-
        w0 = wtable[j];
        w1 = wtable[j+1];
-
        u0 = a[j];
        v0 = a[j+mhalf];
-
        u1 = a[j+1];
        v1 = a[j+1+mhalf];
-
        a[j] = addmod(u0, v0, umod);
        v0 = submod(u0, v0, umod);
-
        a[j+1] = addmod(u1, v1, umod);
        v1 = submod(u1, v1, umod);
-
        MULMOD2(&v0, w0, &v1, w1);
-
        a[j+mhalf] = v0;
        a[j+1+mhalf] = v1;
-
    }
-
    wstep = 2;
    for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
-
        mhalf = m / 2;
-
        /* j == 0 */
        for (r = 0; r < n; r += 2*m) {
-
            u0 = a[r];
            v0 = a[r+mhalf];
-
            u1 = a[m+r];
            v1 = a[m+r+mhalf];
-
            a[r] = addmod(u0, v0, umod);
            v0 = submod(u0, v0, umod);
-
            a[m+r] = addmod(u1, v1, umod);
            v1 = submod(u1, v1, umod);
-
            a[r+mhalf] = v0;
            a[m+r+mhalf] = v1;
        }
-
        for (j = 1; j < mhalf; j++) {
-
            w = wtable[j*wstep];
-
            for (r = 0; r < n; r += 2*m) {
-
                u0 = a[r+j];
                v0 = a[r+j+mhalf];
-
                u1 = a[m+r+j];
                v1 = a[m+r+j+mhalf];
-
                a[r+j] = addmod(u0, v0, umod);
                v0 = submod(u0, v0, umod);
-
                a[m+r+j] = addmod(u1, v1, umod);
                v1 = submod(u1, v1, umod);
-
                MULMOD2C(&v0, &v1, w);
-
                a[r+j+mhalf] = v0;
                a[m+r+j+mhalf] = v1;
            }
-
        }
-
    }
-
    bitreverse_permute(a, n);
 }
--- a/third_party/python/Modules/_decimal/libmpdec/difradix2.h
+++ b/third_party/python/Modules/_decimal/libmpdec/difradix2.h
@ -2,13 +2,7 @@
 #define DIF_RADIX2_H
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/numbertheory.h"
-/* clang-format off */

-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-void fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams);
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
+void fnt_dif2(mpd_uint_t[], mpd_size_t, struct fnt_params *);

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/fnt.c
+++ b/third_party/python/Modules/_decimal/libmpdec/fnt.c
@ -40,25 +40,20 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
 /* Bignum: Fast transform for medium-sized coefficients. */

-
 /* forward transform, sign = -1 */
 int
 std_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
 {
    struct fnt_params *tparams;
-
    assert(ispower2(n));
    assert(n >= 4);
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
-
    if ((tparams = _mpd_init_fnt_params(n, -1, modnum)) == NULL) {
        return 0;
    }
    fnt_dif2(a, n, tparams);
-
    mpd_free(tparams);
    return 1;
 }
@ -68,16 +63,13 @@ int
 std_inv_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
 {
    struct fnt_params *tparams;
-
    assert(ispower2(n));
    assert(n >= 4);
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
-
    if ((tparams = _mpd_init_fnt_params(n, 1, modnum)) == NULL) {
        return 0;
    }
    fnt_dif2(a, n, tparams);
-
    mpd_free(tparams);
    return 1;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/fnt.h
+++ b/third_party/python/Modules/_decimal/libmpdec/fnt.h
@ -1,14 +1,10 @@
 #ifndef FNT_H
 #define FNT_H
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-/* clang-format off */

 /* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

-int std_fnt(mpd_uint_t a[], mpd_size_t n, int modnum);
-int std_inv_fnt(mpd_uint_t a[], mpd_size_t n, int modnum);
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
+int std_fnt(mpd_uint_t[], mpd_size_t, int);
+int std_inv_fnt(mpd_uint_t[], mpd_size_t, int);

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/fourstep.c
+++ b/third_party/python/Modules/_decimal/libmpdec/fourstep.c
@ -40,12 +40,261 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
-/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
-   form 3 * 2**n (See literature/matrix-transform.txt). */
+/*
+                 Cache Efficient Matrix Fourier Transform
+                        for arrays of form 3×2ⁿ


-#ifndef PPRO
+The Matrix Fourier Transform
+════════════════════════════
+
+In libmpdec, the Matrix Fourier Transform [1] is called four-step
+transform after a variant that appears in [2]. The algorithm requires
+that the input array can be viewed as an R*C matrix.
+
+All operations are done modulo p. For readability, the proofs drop all
+instances of (mod p).
+
+
+Algorithm four-step (forward transform)
+───────────────────────────────────────
+
+  a := input array
+  d := len(a) = R * C
+  p := prime
+  w := primitive root of unity of the prime field
+  r := w**((p-1)/d)
+  A := output array
+
+  1) Apply a length R FNT to each column.
+
+  2) Multiply each matrix element (addressed by j*C+m) by r**(j*m).
+
+  3) Apply a length C FNT to each row.
+
+  4) Transpose the matrix.
+
+
+Proof (forward transform)
+─────────────────────────
+
+  The algorithm can be derived starting from the regular definition of
+  the finite-field transform of length d:
+
+            d-1
+           ,────
+           \
+    A[k] =  |  a[l]  × r**(k × l)
+           /
+           `────
+           l = 0
+
+
+  The sum can be rearranged into the sum of the sums of columns:
+
+            C-1     R-1
+           ,────   ,────
+           \       \
+         =  |       |  a[i × C + j] × r**(k × (i × C + j))
+           /       /
+           `────   `────
+           j = 0   i = 0
+
+
+  Extracting a constant from the inner sum:
+
+            C-1           R-1
+           ,────         ,────
+           \             \
+         =  |  rᵏ×j    ×  |  a[i × C + j] × r**(k × i × C)
+           /             /
+           `────         `────
+           j = 0         i = 0
+
+
+  Without any loss of generality, let k = n × R + m,
+  where n < C and m < R:
+
+                C-1                          R-1
+               ,────                        ,────
+               \                            \
+    A[n×R+m] =  |  r**(R×n×j) × r**(m×j)  ×  |  a[i×C+j] × r**(R×C×n×i) × r**(C×m×i)
+               /                            /
+               `────                        `────
+               j = 0                        i = 0
+
+
+  Since r = w ** ((p-1) / (R×C)):
+
+     a) r**(R×C×n×i) = w**((p-1)×n×i) = 1
+
+     b) r**(C×m×i) = w**((p-1) / R) ** (m×i) = r_R ** (m×i)
+
+     c) r**(R×n×j) = w**((p-1) / C) ** (n×j) = r_C ** (n×j)
+
+     r_R := root of the subfield of length R.
+     r_C := root of the subfield of length C.
+
+
+                C-1                             R-1
+               ,────                           ,────
+               \                               \
+    A[n×R+m] =  |  r_C**(n×j) × [ r**(m×j)  ×   |  a[i×C+j] × r_R**(m×i) ]
+               /                     ^         /
+               `────                 |         `────    1) transform the columns
+               j = 0                 |         i = 0
+                 ^                   |
+                 |                   `-- 2) multiply
+                 |
+                 `-- 3) transform the rows
+
+
+   Note that the entire RHS is a function of n and m and that the results
+   for each pair (n, m) are stored in Fortran order.
+
+   Let the term in square brackets be 𝑓(m, j). Step 1) and 2) precalculate
+   the term for all (m, j). After that, the original matrix is now a lookup
+   table with the mth element in the jth column at location m × C + j.
+
+   Let the complete RHS be g(m, n). Step 3) does an in-place transform of
+   length n on all rows. After that, the original matrix is now a lookup
+   table with the mth element in the nth column at location m × C + n.
+
+   But each (m, n) pair should be written to location n × R + m. Therefore,
+   step 4) transposes the result of step 3).
+
+
+
+Algorithm four-step (inverse transform)
+───────────────────────────────────────
+
+  A  := input array
+  d  := len(A) = R × C
+  p  := prime
+  d′ := d⁽ᵖ⁻²⁾               # inverse of d
+  w  := primitive root of unity of the prime field
+  r  := w**((p-1)/d)         # root of the subfield
+  r′ := w**((p-1) - (p-1)/d) # inverse of r
+  a  := output array
+
+  0) View the matrix as a C×R matrix.
+
+  1) Transpose the matrix, producing an R×C matrix.
+
+  2) Apply a length C FNT to each row.
+
+  3) Multiply each matrix element (addressed by i×C+n) by r**(i×n).
+
+  4) Apply a length R FNT to each column.
+
+
+Proof (inverse transform)
+─────────────────────────
+
+  The algorithm can be derived starting from the regular definition of
+  the finite-field inverse transform of length d:
+
+                  d-1
+                 ,────
+                 \
+    a[k] =  d′ ×  |  A[l]  × r′ ** (k × l)
+                 /
+                 `────
+                 l = 0
+
+
+  The sum can be rearranged into the sum of the sums of columns. Note
+  that at this stage we still have a C*R matrix, so C denotes the number
+  of rows:
+
+                  R-1     C-1
+                 ,────   ,────
+                 \       \
+         =  d′ ×  |       |  a[j × R + i] × r′ ** (k × (j × R + i))
+                 /       /
+                 `────   `────
+                 i = 0   j = 0
+
+
+  Extracting a constant from the inner sum:
+
+                  R-1                C-1
+                 ,────              ,────
+                 \                  \
+         =  d′ ×  |  r′ ** (k×i)  ×  |  a[j × R + i] × r′ ** (k × j × R)
+                 /                  /
+                 `────              `────
+                 i = 0              j = 0
+
+
+  Without any loss of generality, let k = m * C + n,
+  where m < R and n < C:
+
+                     R-1                                  C-1
+                    ,────                                ,────
+                    \                                    \
+    A[m×C+n] = d′ ×  |  r′ ** (C×m×i) ×  r′ ** (n×i)   ×  |  a[j×R+i] × r′ ** (R×C×m×j) × r′ ** (R×n×j)
+                    /                                    /
+                    `────                                `────
+                    i = 0                                j = 0
+
+
+  Since r′ = w**((p-1) - (p-1)/d) and d = R×C:
+
+     a) r′ ** (R×C×m×j) = w**((p-1)×R×C×m×j - (p-1)×m×j) = 1
+
+     b) r′ ** (C×m×i) = w**((p-1)×C - (p-1)/R) ** (m×i) = r_R′ ** (m×i)
+
+     c) r′ ** (R×n×j) = r_C′ ** (n×j)
+
+     d) d′ = d⁽ᵖ⁻²⁾ = (R×C)⁽ᵖ⁻²⁾ = R⁽ᵖ⁻²⁾ × C⁽ᵖ⁻²⁾ = R′ × C′
+
+     r_R′ := inverse of the root of the subfield of length R.
+     r_C′ := inverse of the root of the subfield of length C.
+     R′   := inverse of R
+     C′   := inverse of C
+
+
+                     R-1                                      C-1
+                    ,────                                    ,────  2) transform the rows of a^T
+                    \                                        \
+    A[m×C+n] = R′ ×  |  r_R′ ** (m×i) × [ r′ ** (n×i) × C′ ×  |  a[j×R+i] × r_C′ ** (n×j) ]
+                    /                           ^            /       ^
+                    `────                       |            `────   |
+                    i = 0                       |            j = 0   |
+                      ^                         |                    `── 1) Transpose input matrix
+                      |                         `── 3) multiply             to address elements by
+                      |                                                     i × C + j
+                      `── 3) transform the columns
+
+
+
+   Note that the entire RHS is a function of m and n and that the results
+   for each pair (m, n) are stored in C order.
+
+   Let the term in square brackets be 𝑓(n, i). Without step 1), the sum
+   would perform a length C transform on the columns of the input matrix.
+   This is a) inefficient and b) the results are needed in C order, so
+   step 1) exchanges rows and columns.
+
+   Step 2) and 3) precalculate 𝑓(n, i) for all (n, i). After that, the
+   original matrix is now a lookup table with the ith element in the nth
+   column at location i × C + n.
+
+   Let the complete RHS be g(m, n). Step 4) does an in-place transform of
+   length m on all columns. After that, the original matrix is now a lookup
+   table with the mth element in the nth column at location m × C + n,
+   which means that all A[k] = A[m × C + n] are in the correct order.
+
+
+──
+
+  [1] Joerg Arndt: "Matters Computational"
+      http://www.jjj.de/fxt/
+  [2] David H. Bailey: FFTs in External or Hierarchical Memory
+      http://crd.lbl.gov/~dhbailey/dhbpapers/
+*/
+
 static inline void
 std_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3,
              mpd_uint_t w3table[3], mpd_uint_t umod)
@ -53,90 +302,32 @@ std_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3,
    mpd_uint_t r1, r2;
    mpd_uint_t w;
    mpd_uint_t s, tmp;
-
-
    /* k = 0 -> w = 1 */
    s = *x1;
    s = addmod(s, *x2, umod);
    s = addmod(s, *x3, umod);
-
    r1 = s;
-
    /* k = 1 */
    s = *x1;
-
    w = w3table[1];
    tmp = MULMOD(*x2, w);
    s = addmod(s, tmp, umod);
-
    w = w3table[2];
    tmp = MULMOD(*x3, w);
    s = addmod(s, tmp, umod);
-
    r2 = s;
-
    /* k = 2 */
    s = *x1;
-
    w = w3table[2];
    tmp = MULMOD(*x2, w);
    s = addmod(s, tmp, umod);
-
    w = w3table[1];
    tmp = MULMOD(*x3, w);
    s = addmod(s, tmp, umod);
-
    *x3 = s;
    *x2 = r2;
    *x1 = r1;
 }
-#else /* PPRO */
-static inline void
-ppro_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_uint_t w3table[3],
-               mpd_uint_t umod, double *dmod, uint32_t dinvmod[3])
-{
-    mpd_uint_t r1, r2;
-    mpd_uint_t w;
-    mpd_uint_t s, tmp;
-
-
-    /* k = 0 -> w = 1 */
-    s = *x1;
-    s = addmod(s, *x2, umod);
-    s = addmod(s, *x3, umod);
-
-    r1 = s;
-
-    /* k = 1 */
-    s = *x1;
-
-    w = w3table[1];
-    tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
-    s = addmod(s, tmp, umod);
-
-    w = w3table[2];
-    tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
-    s = addmod(s, tmp, umod);
-
-    r2 = s;
-
-    /* k = 2 */
-    s = *x1;
-
-    w = w3table[2];
-    tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
-    s = addmod(s, tmp, umod);
-
-    w = w3table[1];
-    tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
-    s = addmod(s, tmp, umod);
-
-    *x3 = s;
-    *x2 = r2;
-    *x1 = r1;
-}
-#endif
-

 /* forward transform, sign = -1; transform length = 3 * 2**n */
 int
@ -148,25 +339,15 @@ four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    mpd_uint_t kernel, w0, w1, wstep;
    mpd_uint_t *s, *p0, *p1, *p2;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_size_t i, k;
-
-
    assert(n >= 48);
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
-
-
    /* Length R transform on the columns. */
    SETMODULUS(modnum);
    _mpd_init_w3table(w3table, -1, modnum);
    for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
-
        SIZE3_NTT(p0, p1, p2, w3table);
    }
-
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    kernel = _mpd_getkernel(n, -1, modnum);
    for (i = 1; i < R; i++) {
@ -182,20 +363,17 @@ four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
            a[i*C+k+1] = x1;
        }
    }
-
    /* Length C transform on the rows. */
    for (s = a; s < a+n; s += C) {
        if (!six_step_fnt(s, C, modnum)) {
            return 0;
        }
    }
-
 #if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix. */
    transpose_3xpow2(a, R, C);
 #endif
-
    return 1;
 }

@ -209,30 +387,20 @@ inv_four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    mpd_uint_t kernel, w0, w1, wstep;
    mpd_uint_t *s, *p0, *p1, *p2;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_size_t i, k;
-
-
    assert(n >= 48);
    assert(n <= 3*MPD_MAXTRANSFORM_2N);
-
-
 #if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix, producing an R*C matrix. */
    transpose_3xpow2(a, C, R);
 #endif
-
    /* Length C transform on the rows. */
    for (s = a; s < a+n; s += C) {
        if (!inv_six_step_fnt(s, C, modnum)) {
            return 0;
        }
    }
-
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, 1, modnum);
@ -249,13 +417,10 @@ inv_four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
            a[i*C+k+1] = x1;
        }
    }
-
    /* Length R transform on the columns. */
    _mpd_init_w3table(w3table, 1, modnum);
    for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
-
        SIZE3_NTT(p0, p1, p2, w3table);
    }
-
    return 1;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/fourstep.h
+++ b/third_party/python/Modules/_decimal/libmpdec/fourstep.h
@ -1,14 +1,8 @@
 #ifndef FOUR_STEP_H
 #define FOUR_STEP_H
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-/* clang-format off */

-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-int four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum);
-int inv_four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum);
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
+int four_step_fnt(mpd_uint_t *, mpd_size_t, int);
+int inv_four_step_fnt(mpd_uint_t *, mpd_size_t, int);

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/io.c
+++ b/third_party/python/Modules/_decimal/libmpdec/io.c
@ -70,7 +70,6 @@ _mpd_strneq(const char *s, const char *l, const char *u, size_t n)
        }
        s++; u++; l++;
    }
-
    return 1;
 }

@ -79,12 +78,10 @@ strtoexp(const char *s)
 {
    char *end;
    mpd_ssize_t retval;
-
    errno = 0;
    retval = mpd_strtossize(s, &end, 10);
    if (errno == 0 && !(*s != '\0' && *end == '\0'))
        errno = EINVAL;
-
    return retval;
 }

@ -206,11 +203,9 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
    const char *dpoint = NULL, *exp = NULL;
    size_t digits;
    uint8_t sign = MPD_POS;
-
    mpd_set_flags(dec, 0);
    dec->len = 0;
    dec->exp = 0;
-
    /* sign */
    if (*s == '+') {
        s++;
@ -220,7 +215,6 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
        sign = MPD_NEG;
        s++;
    }
-
    if (_mpd_strneq(s, "nan", "NAN", 3)) { /* NaN */
        s += 3;
        mpd_setspecial(dec, sign, MPD_NAN);
@ -265,7 +259,6 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
        /* scan for start of coefficient, decimal point, indicator, end */
        if ((coeff = scan_dpoint_exp(s, &dpoint, &exp, &end)) == NULL)
            goto conversion_error;
-
        /* numeric-value: [exponent-part] */
        if (exp) {
            /* exponent-part */
@ -273,17 +266,15 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
            dec->exp = strtoexp(exp);
            if (errno) {
                if (!(errno == ERANGE &&
-                     (dec->exp == MPD_SSIZE_MAX ||
-                      dec->exp == MPD_SSIZE_MIN)))
+                      (dec->exp == MPD_SSIZE_MAX ||
+                       dec->exp == MPD_SSIZE_MIN)))
                    goto conversion_error;
            }
        }
-
-            digits = end - coeff;
+        digits = end - coeff;
        if (dpoint) {
            size_t fracdigits = end-dpoint-1;
            if (dpoint > coeff) digits--;
-
            if (fracdigits > MPD_MAX_PREC) {
                goto conversion_error;
            }
@ -304,9 +295,7 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
            dec->exp = MPD_SSIZE_MIN+1;
        }
    }
-
    _mpd_idiv_word(&q, &r, (mpd_ssize_t)digits, MPD_RDIGITS);
-
    len = (r == 0) ? q : q+1;
    if (len == 0) {
        goto conversion_error; /* GCOV_NOT_REACHED */
@ -316,13 +305,10 @@ mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx,
        return;
    }
    dec->len = len;
-
    string_to_coeff(dec->data, coeff, dpoint, (int)r, len);
-
    mpd_setdigits(dec);
    mpd_qfinalize(dec, ctx, status);
    return;
-
 conversion_error:
    /* standard wants a positive NaN */
    mpd_seterror(dec, MPD_Conversion_syntax, status);
@ -336,7 +322,6 @@ static inline char *
 word_to_string(char *s, mpd_uint_t x, int n, char *dot)
 {
    switch(n) {
-#ifdef CONFIG_64
    case 20: EXTRACT_DIGIT(s, x, 10000000000000000000ULL, dot); /* GCOV_NOT_REACHED */
    case 19: EXTRACT_DIGIT(s, x, 1000000000000000000ULL, dot);
    case 18: EXTRACT_DIGIT(s, x, 100000000000000000ULL, dot);
@ -347,7 +332,6 @@ word_to_string(char *s, mpd_uint_t x, int n, char *dot)
    case 13: EXTRACT_DIGIT(s, x, 1000000000000ULL, dot);
    case 12: EXTRACT_DIGIT(s, x, 100000000000ULL, dot);
    case 11: EXTRACT_DIGIT(s, x, 10000000000ULL, dot);
-#endif
    case 10: EXTRACT_DIGIT(s, x, 1000000000UL, dot);
    case 9:  EXTRACT_DIGIT(s, x, 100000000UL, dot);
    case 8:  EXTRACT_DIGIT(s, x, 10000000UL, dot);
@ -359,7 +343,6 @@ word_to_string(char *s, mpd_uint_t x, int n, char *dot)
    case 2:  EXTRACT_DIGIT(s, x, 10UL, dot);
    default: if (s == dot) *s++ = '.'; *s++ = '0' + (char)x;
    }
-
    *s = '\0';
    return s;
 }
@ -369,13 +352,11 @@ static inline char *
 exp_to_string(char *s, mpd_ssize_t x)
 {
    char sign = '+';
-
    if (x < 0) {
        sign = '-';
        x = -x;
    }
    *s++ = sign;
-
    return word_to_string(s, x, mpd_word_digits(x), NULL);
 }

@ -572,7 +553,6 @@ _mpd_to_string(char **result, const mpd_t *dec, int flags, mpd_ssize_t dplace)
            return -1;
        }

-
        if (mpd_isnegative(dec)) {
            *cp++ = '-';
        }
@ -678,8 +658,6 @@ _mpd_copy_utf8(char dest[5], const char *s)
    const uchar *cp = (const uchar *)s;
    uchar lb, ub;
    int count, i;
-
-
    if (*cp == 0) {
        /* empty string */
        dest[0] = '\0';
@ -727,7 +705,6 @@ _mpd_copy_utf8(char dest[5], const char *s)
        /* invalid */
        goto error;
    }
-
    dest[0] = *cp++;
    if (*cp < lb || ub < *cp) {
        goto error;
@ -740,9 +717,7 @@ _mpd_copy_utf8(char dest[5], const char *s)
        dest[i] = *cp++;
    }
    dest[i] = '\0';
-
    return count;
-
 error:
    dest[0] = '\0';
    return -1;
@ -787,7 +762,6 @@ mpd_parse_fmt_str(mpd_spec_t *spec, const char *fmt, int caps)
    spec->sep = "";
    spec->grouping = "";

-
    /* presume that the first character is a UTF-8 fill character */
    if ((n = _mpd_copy_utf8(spec->fill, cp)) < 0) {
        return 0;
@ -910,9 +884,8 @@ typedef struct {
 static inline void
 _mpd_bcopy(char *dest, const char *src, mpd_ssize_t n)
 {
-    while (--n >= 0) {
-        dest[n] = src[n];
-    }
+    /* [jart] just use memmove */
+    memmove(dest, src, n);
 }

 static inline void
@ -921,7 +894,6 @@ _mbstr_copy_char(mpd_mbstr_t *dest, const char *src, mpd_ssize_t n)
    dest->nbytes += n;
    dest->nchars += (n > 0 ? 1 : 0);
    dest->cur -= n;
-
    if (dest->data != NULL) {
        _mpd_bcopy(dest->data+dest->cur, src, n);
    }
@ -933,7 +905,6 @@ _mbstr_copy_ascii(mpd_mbstr_t *dest, const char *src, mpd_ssize_t n)
    dest->nbytes += n;
    dest->nchars += n;
    dest->cur -= n;
-
    if (dest->data != NULL) {
        _mpd_bcopy(dest->data+dest->cur, src, n);
    }
@ -945,7 +916,6 @@ _mbstr_copy_pad(mpd_mbstr_t *dest, mpd_ssize_t n)
    dest->nbytes += n;
    dest->nchars += n;
    dest->cur -= n;
-
    if (dest->data != NULL) {
        char *cp = dest->data + dest->cur;
        while (--n >= 0) {
@ -1452,9 +1422,7 @@ mpd_snprint_flags(char *dest, int nmemb, uint32_t flags)
 {
    char *cp;
    int n, j;
-
    assert(nmemb >= MPD_MAX_FLAG_STRING);
-
    *dest = '\0'; cp = dest;
    for (j = 0; j < MPD_NUM_FLAGS; j++) {
        if (flags & (1U<<j)) {
@ -1463,11 +1431,9 @@ mpd_snprint_flags(char *dest, int nmemb, uint32_t flags)
            cp += n; nmemb -= n;
        }
    }
-
    if (cp != dest) {
        *(--cp) = '\0';
    }
-
    return (int)(cp-dest);
 }

@ -1477,17 +1443,14 @@ mpd_lsnprint_flags(char *dest, int nmemb, uint32_t flags, const char *flag_strin
 {
    char *cp;
    int n, j;
-
    assert(nmemb >= MPD_MAX_FLAG_LIST);
    if (flag_string == NULL) {
        flag_string = mpd_flag_string;
    }
-
    *dest = '[';
    *(dest+1) = '\0';
    cp = dest+1;
    --nmemb;
-
    for (j = 0; j < MPD_NUM_FLAGS; j++) {
        if (flags & (1U<<j)) {
            n = snprintf(cp, nmemb, "%s, ", flag_string[j]);
@ -1495,15 +1458,12 @@ mpd_lsnprint_flags(char *dest, int nmemb, uint32_t flags, const char *flag_strin
            cp += n; nmemb -= n;
        }
    }
-
    /* erase the last ", " */
    if (cp != dest+1) {
        cp -= 2;
    }
-
    *cp++ = ']';
    *cp = '\0';
-
    return (int)(cp-dest); /* strlen, without NUL terminator */
 }

@ -1514,17 +1474,14 @@ mpd_lsnprint_signals(char *dest, int nmemb, uint32_t flags, const char *signal_s
    char *cp;
    int n, j;
    int ieee_invalid_done = 0;
-
    assert(nmemb >= MPD_MAX_SIGNAL_LIST);
    if (signal_string == NULL) {
        signal_string = mpd_signal_string;
    }
-
    *dest = '[';
    *(dest+1) = '\0';
    cp = dest+1;
    --nmemb;
-
    for (j = 0; j < MPD_NUM_FLAGS; j++) {
        uint32_t f = flags & (1U<<j);
        if (f) {
@ -1539,15 +1496,12 @@ mpd_lsnprint_signals(char *dest, int nmemb, uint32_t flags, const char *signal_s
            cp += n; nmemb -= n;
        }
    }
-
    /* erase the last ", " */
    if (cp != dest+1) {
        cp -= 2;
    }
-
    *cp++ = ']';
    *cp = '\0';
-
    return (int)(cp-dest); /* strlen, without NUL terminator */
 }

@ -1556,7 +1510,6 @@ void
 mpd_fprint(FILE *file, const mpd_t *dec)
 {
    char *decstring;
-
    decstring = mpd_to_sci(dec, 1);
    if (decstring != NULL) {
        fprintf(file, "%s\n", decstring);
@ -1571,7 +1524,6 @@ void
 mpd_print(const mpd_t *dec)
 {
    char *decstring;
-
    decstring = mpd_to_sci(dec, 1);
    if (decstring != NULL) {
        printf("%s\n", decstring);
--- a/third_party/python/Modules/_decimal/libmpdec/mpalloc.h
+++ b/third_party/python/Modules/_decimal/libmpdec/mpalloc.h
@ -1,15 +1,9 @@
 #ifndef MPALLOC_H
 #define MPALLOC_H
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-/* clang-format off */

-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-int mpd_switch_to_dyn(mpd_t *result, mpd_ssize_t size, uint32_t *status);
-int mpd_switch_to_dyn_zero(mpd_t *result, mpd_ssize_t size, uint32_t *status);
-int mpd_realloc_dyn(mpd_t *result, mpd_ssize_t size, uint32_t *status);
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
+int mpd_switch_to_dyn(mpd_t *, mpd_ssize_t, uint32_t *);
+int mpd_switch_to_dyn_zero(mpd_t *, mpd_ssize_t, uint32_t *);
+int mpd_realloc_dyn(mpd_t *, mpd_ssize_t, uint32_t *);

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c
+++ b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.c
--- a/third_party/python/Modules/_decimal/libmpdec/mpdecimal.h
+++ b/third_party/python/Modules/_decimal/libmpdec/mpdecimal.h
@ -5,85 +5,19 @@
 #include "libc/limits.h"
 #include "libc/stdio/stdio.h"
 #include "third_party/python/pyconfig.h"
+COSMOPOLITAN_C_START_
 /* clang-format off */

-#ifdef __cplusplus
-extern "C" {
-  #ifndef __STDC_LIMIT_MACROS
-    #define __STDC_LIMIT_MACROS
-    #define MPD_CLEAR_STDC_LIMIT_MACROS
-  #endif
-#endif
-
-#ifndef __GNUC_STDC_INLINE__
-#define __GNUC_STDC_INLINE__ 1
-#endif
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#define UNUSED __attribute__((__unused__))
-#else
-#define UNUSED
-#endif
-#if (defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)) && \
-  defined(__GNUC__) && __GNUC__ >= 4 && !defined(__INTEL_COMPILER)
-#define MPD_PRAGMA(x) _Pragma(x)
-#define MPD_HIDE_SYMBOLS_START "GCC visibility push(hidden)"
-#define MPD_HIDE_SYMBOLS_END "GCC visibility pop"
-#else
-#define MPD_PRAGMA(x)
-#define MPD_HIDE_SYMBOLS_START
-#define MPD_HIDE_SYMBOLS_END
-#endif
-#define EXTINLINE
-
-
-/* This header file is internal for the purpose of building _decimal.so.
- * All symbols should have local scope in the DSO. */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-/******************************************************************************/
-/*                                  Version                                   */
-/******************************************************************************/
-
+#define MPD_VERSION "2.4.2"
 #define MPD_MAJOR_VERSION 2
 #define MPD_MINOR_VERSION 4
 #define MPD_MICRO_VERSION 2
-
-#define MPD_VERSION "2.4.2"
-
 #define MPD_VERSION_HEX ((MPD_MAJOR_VERSION << 24) | \
                         (MPD_MINOR_VERSION << 16) | \
                         (MPD_MICRO_VERSION <<  8))

 const char *mpd_version(void);

-
-/******************************************************************************/
-/*                              Configuration                                 */
-/******************************************************************************/
-
-#if defined(CONFIG_64) || defined(CONFIG_32)
-#error "cannot use CONFIG_64 or CONFIG_32 with UNIVERSAL."
-#endif
-#if defined(__ppc__)
-#define CONFIG_32
-#define ANSI
-#elif defined(__ppc64__)
-#define CONFIG_64
-#define ANSI
-#elif defined(__i386__)
-#define CONFIG_32
-#define ANSI
-#elif defined(__x86_64__)
-#define CONFIG_64
-#define ASM
-#else
-#error "unknown architecture for universal build."
-#endif
-
-
-/* BEGIN CONFIG_64 */
-#if defined(CONFIG_64)
-/* types for modular and base arithmetic */
 #define MPD_UINT_MAX UINT64_MAX
 #define MPD_BITS_PER_UINT 64
 typedef uint64_t mpd_uint_t;  /* unsigned mod type */
@ -97,7 +31,6 @@ typedef size_t mpd_size_t; /* unsigned size type */
 typedef int64_t mpd_ssize_t;
 #define _mpd_strtossize strtoll

-/* decimal arithmetic */
 #define MPD_RADIX 10000000000000000000ULL  /* 10**19 */
 #define MPD_RDIGITS 19
 #define MPD_MAX_POW10 19
@ -117,57 +50,6 @@ typedef int64_t mpd_ssize_t;
 /* conversion specifiers */
 #define PRI_mpd_uint_t PRIu64
 #define PRI_mpd_ssize_t PRIi64
-/* END CONFIG_64 */
-
-
-/* BEGIN CONFIG_32 */
-#elif defined(CONFIG_32)
-/* types for modular and base arithmetic */
-#define MPD_UINT_MAX UINT32_MAX
-#define MPD_BITS_PER_UINT 32
-typedef uint32_t mpd_uint_t;  /* unsigned mod type */
-
-#ifndef LEGACY_COMPILER
-#define MPD_UUINT_MAX UINT64_MAX
-typedef uint64_t mpd_uuint_t; /* double width unsigned mod type */
-#endif
-
-#define MPD_SIZE_MAX SIZE_MAX
-typedef size_t mpd_size_t; /* unsigned size type */
-
-/* type for dec->len, dec->exp, ctx->prec */
-#define MPD_SSIZE_MAX INT32_MAX
-#define MPD_SSIZE_MIN INT32_MIN
-typedef int32_t mpd_ssize_t;
-#define _mpd_strtossize strtol
-
-/* decimal arithmetic */
-#define MPD_RADIX 1000000000UL  /* 10**9 */
-#define MPD_RDIGITS 9
-#define MPD_MAX_POW10 9
-#define MPD_EXPDIGITS 10 /* MPD_EXPDIGITS <= MPD_RDIGITS+1 */
-
-#define MPD_MAXTRANSFORM_2N 33554432UL /* 2**25 */
-#define MPD_MAX_PREC 425000000L
-#define MPD_MAX_PREC_LOG2 32
-#define MPD_ELIMIT 425000001L
-#define MPD_MAX_EMAX 425000000L        /* ELIMIT-1 */
-#define MPD_MIN_EMIN (-425000000L)     /* -EMAX */
-#define MPD_MIN_ETINY (MPD_MIN_EMIN-(MPD_MAX_PREC-1))
-#define MPD_EXP_INF 1000000001L      /* allows for emax=999999999 in the tests */
-#define MPD_EXP_CLAMP (-2000000001L) /* allows for emin=-999999999 in the tests */
-#define MPD_MAXIMPORT 94444445L      /* ceil((2*MPD_MAX_PREC)/MPD_RDIGITS) */
-
-/* conversion specifiers */
-#define PRI_mpd_uint_t PRIu32
-#define PRI_mpd_ssize_t PRIi32
-/* END CONFIG_32 */
-
-#else
-  #error "define CONFIG_64 or CONFIG_32"
-#endif
-/* END CONFIG */
-

 #if MPD_SIZE_MAX != MPD_UINT_MAX
  #error "unsupported platform: need mpd_size_t == mpd_uint_t"
@ -256,39 +138,35 @@ typedef struct mpd_context_t {
 #define MPD_DECIMAL64 64
 #define MPD_DECIMAL128 128

-
 #define MPD_MINALLOC_MIN 2
 #define MPD_MINALLOC_MAX 64
+
 extern mpd_ssize_t MPD_MINALLOC;
 extern void (* mpd_traphandler)(mpd_context_t *);
 void mpd_dflt_traphandler(mpd_context_t *);
-
-void mpd_setminalloc(mpd_ssize_t n);
-void mpd_init(mpd_context_t *ctx, mpd_ssize_t prec);
-
-void mpd_maxcontext(mpd_context_t *ctx);
-void mpd_defaultcontext(mpd_context_t *ctx);
-void mpd_basiccontext(mpd_context_t *ctx);
-int mpd_ieee_context(mpd_context_t *ctx, int bits);
-
-mpd_ssize_t mpd_getprec(const mpd_context_t *ctx);
-mpd_ssize_t mpd_getemax(const mpd_context_t *ctx);
-mpd_ssize_t mpd_getemin(const mpd_context_t *ctx);
-int mpd_getround(const mpd_context_t *ctx);
-uint32_t mpd_gettraps(const mpd_context_t *ctx);
-uint32_t mpd_getstatus(const mpd_context_t *ctx);
-int mpd_getclamp(const mpd_context_t *ctx);
-int mpd_getcr(const mpd_context_t *ctx);
-
-int mpd_qsetprec(mpd_context_t *ctx, mpd_ssize_t prec);
-int mpd_qsetemax(mpd_context_t *ctx, mpd_ssize_t emax);
-int mpd_qsetemin(mpd_context_t *ctx, mpd_ssize_t emin);
-int mpd_qsetround(mpd_context_t *ctx, int newround);
-int mpd_qsettraps(mpd_context_t *ctx, uint32_t flags);
-int mpd_qsetstatus(mpd_context_t *ctx, uint32_t flags);
-int mpd_qsetclamp(mpd_context_t *ctx, int c);
-int mpd_qsetcr(mpd_context_t *ctx, int c);
-void mpd_addstatus_raise(mpd_context_t *ctx, uint32_t flags);
+void mpd_setminalloc(mpd_ssize_t);
+void mpd_init(mpd_context_t *, mpd_ssize_t);
+void mpd_maxcontext(mpd_context_t *);
+void mpd_defaultcontext(mpd_context_t *);
+void mpd_basiccontext(mpd_context_t *);
+int mpd_ieee_context(mpd_context_t *, int);
+mpd_ssize_t mpd_getprec(const mpd_context_t *);
+mpd_ssize_t mpd_getemax(const mpd_context_t *);
+mpd_ssize_t mpd_getemin(const mpd_context_t *);
+int mpd_getround(const mpd_context_t *);
+uint32_t mpd_gettraps(const mpd_context_t *);
+uint32_t mpd_getstatus(const mpd_context_t *);
+int mpd_getclamp(const mpd_context_t *);
+int mpd_getcr(const mpd_context_t *);
+int mpd_qsetprec(mpd_context_t *, mpd_ssize_t);
+int mpd_qsetemax(mpd_context_t *, mpd_ssize_t);
+int mpd_qsetemin(mpd_context_t *, mpd_ssize_t);
+int mpd_qsetround(mpd_context_t *, int);
+int mpd_qsettraps(mpd_context_t *, uint32_t);
+int mpd_qsetstatus(mpd_context_t *, uint32_t);
+int mpd_qsetclamp(mpd_context_t *, int);
+int mpd_qsetcr(mpd_context_t *, int);
+void mpd_addstatus_raise(mpd_context_t *, uint32_t);


 /******************************************************************************/
@ -308,7 +186,6 @@ void mpd_addstatus_raise(mpd_context_t *ctx, uint32_t flags);
 #define MPD_CONST_DATA          ((uint8_t)128)
 #define MPD_DATAFLAGS (MPD_STATIC_DATA|MPD_SHARED_DATA|MPD_CONST_DATA)

-/* mpd_t */
 typedef struct mpd_t {
    uint8_t flags;
    mpd_ssize_t exp;
@ -318,7 +195,6 @@ typedef struct mpd_t {
    mpd_uint_t *data;
 } mpd_t;

-
 typedef unsigned char uchar;


@ -340,388 +216,352 @@ typedef struct mpd_spec_t {
 } mpd_spec_t;

 /* output to a string */
-char *mpd_to_sci(const mpd_t *dec, int fmt);
-char *mpd_to_eng(const mpd_t *dec, int fmt);
-mpd_ssize_t mpd_to_sci_size(char **res, const mpd_t *dec, int fmt);
-mpd_ssize_t mpd_to_eng_size(char **res, const mpd_t *dec, int fmt);
-int mpd_validate_lconv(mpd_spec_t *spec);
-int mpd_parse_fmt_str(mpd_spec_t *spec, const char *fmt, int caps);
-char *mpd_qformat_spec(const mpd_t *dec, const mpd_spec_t *spec, const mpd_context_t *ctx, uint32_t *status);
-char *mpd_qformat(const mpd_t *dec, const char *fmt, const mpd_context_t *ctx, uint32_t *status);
+char *mpd_to_sci(const mpd_t *, int);
+char *mpd_to_eng(const mpd_t *, int);
+mpd_ssize_t mpd_to_sci_size(char **, const mpd_t *, int);
+mpd_ssize_t mpd_to_eng_size(char **, const mpd_t *, int);
+int mpd_validate_lconv(mpd_spec_t *);
+int mpd_parse_fmt_str(mpd_spec_t *, const char *, int);
+char *mpd_qformat_spec(const mpd_t *, const mpd_spec_t *, const mpd_context_t *, uint32_t *);
+char *mpd_qformat(const mpd_t *, const char *, const mpd_context_t *, uint32_t *);

 #define MPD_NUM_FLAGS 15
 #define MPD_MAX_FLAG_STRING 208
 #define MPD_MAX_FLAG_LIST (MPD_MAX_FLAG_STRING+18)
 #define MPD_MAX_SIGNAL_LIST 121
-int mpd_snprint_flags(char *dest, int nmemb, uint32_t flags);
-int mpd_lsnprint_flags(char *dest, int nmemb, uint32_t flags, const char *flag_string[]);
-int mpd_lsnprint_signals(char *dest, int nmemb, uint32_t flags, const char *signal_string[]);
+
+int mpd_snprint_flags(char *, int, uint32_t);
+int mpd_lsnprint_flags(char *, int, uint32_t, const char *[]);
+int mpd_lsnprint_signals(char *, int, uint32_t, const char *[]);

 /* output to a file */
-void mpd_fprint(FILE *file, const mpd_t *dec);
-void mpd_print(const mpd_t *dec);
+void mpd_fprint(FILE *, const mpd_t *);
+void mpd_print(const mpd_t *);

 /* assignment from a string */
-void mpd_qset_string(mpd_t *dec, const char *s, const mpd_context_t *ctx, uint32_t *status);
+void mpd_qset_string(mpd_t *, const char *s, const mpd_context_t *, uint32_t *);

 /* set to NaN with error flags */
-void mpd_seterror(mpd_t *result, uint32_t flags, uint32_t *status);
+void mpd_seterror(mpd_t *, uint32_t, uint32_t *);
 /* set a special with sign and type */
-void mpd_setspecial(mpd_t *dec, uint8_t sign, uint8_t type);
+void mpd_setspecial(mpd_t *, uint8_t, uint8_t);
 /* set coefficient to zero or all nines */
-void mpd_zerocoeff(mpd_t *result);
-void mpd_qmaxcoeff(mpd_t *result, const mpd_context_t *ctx, uint32_t *status);
+void mpd_zerocoeff(mpd_t *);
+void mpd_qmaxcoeff(mpd_t *, const mpd_context_t *, uint32_t *);

 /* quietly assign a C integer type to an mpd_t */
-void mpd_qset_ssize(mpd_t *result, mpd_ssize_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qset_i32(mpd_t *result, int32_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qset_uint(mpd_t *result, mpd_uint_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qset_u32(mpd_t *result, uint32_t a, const mpd_context_t *ctx, uint32_t *status);
-#ifndef LEGACY_COMPILER
-void mpd_qset_i64(mpd_t *result, int64_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qset_u64(mpd_t *result, uint64_t a, const mpd_context_t *ctx, uint32_t *status);
-#endif
+void mpd_qset_ssize(mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qset_i32(mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qset_uint(mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qset_u32(mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qset_i64(mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qset_u64(mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);

 /* quietly assign a C integer type to an mpd_t with a static coefficient */
-void mpd_qsset_ssize(mpd_t *result, mpd_ssize_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsset_i32(mpd_t *result, int32_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsset_uint(mpd_t *result, mpd_uint_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsset_u32(mpd_t *result, uint32_t a, const mpd_context_t *ctx, uint32_t *status);
+void mpd_qsset_ssize(mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qsset_i32(mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qsset_uint(mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qsset_u32(mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+mpd_ssize_t mpd_qget_ssize(const mpd_t *, uint32_t *);
+mpd_uint_t mpd_qget_uint(const mpd_t *, uint32_t *);
+mpd_uint_t mpd_qabs_uint(const mpd_t *, uint32_t *);
+int32_t mpd_qget_i32(const mpd_t *, uint32_t *);
+uint32_t mpd_qget_u32(const mpd_t *, uint32_t *);
+int64_t mpd_qget_i64(const mpd_t *, uint32_t *);
+uint64_t mpd_qget_u64(const mpd_t *, uint32_t *);
+int mpd_qcheck_nan(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+int mpd_qcheck_nans(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qfinalize(mpd_t *, const mpd_context_t *, uint32_t *);
+const char *mpd_class(const mpd_t *, const mpd_context_t *);
+mpd_t *mpd_qncopy(const mpd_t *);
+int mpd_qcopy(mpd_t *, const mpd_t *,  uint32_t *);
+int mpd_qcopy_abs(mpd_t *, const mpd_t *, uint32_t *);
+int mpd_qcopy_negate(mpd_t *, const mpd_t *, uint32_t *);
+int mpd_qcopy_sign(mpd_t *, const mpd_t *, const mpd_t *, uint32_t *);
+void mpd_qand(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qinvert(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qlogb(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qor(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qscaleb(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qxor(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+int mpd_same_quantum(const mpd_t *, const mpd_t *);
+void mpd_qrotate(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+int mpd_qshiftl(mpd_t *, const mpd_t *, mpd_ssize_t, uint32_t *);
+mpd_uint_t mpd_qshiftr(mpd_t *, const mpd_t *, mpd_ssize_t, uint32_t *);
+mpd_uint_t mpd_qshiftr_inplace(mpd_t *, mpd_ssize_t);
+void mpd_qshift(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qshiftn(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+int mpd_qcmp(const mpd_t *, const mpd_t *, uint32_t *);
+int mpd_qcompare(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+int mpd_qcompare_signal(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+int mpd_cmp_total(const mpd_t *, const mpd_t *);
+int mpd_cmp_total_mag(const mpd_t *, const mpd_t *);
+int mpd_compare_total(mpd_t *, const mpd_t *, const mpd_t *);
+int mpd_compare_total_mag(mpd_t *, const mpd_t *, const mpd_t *);
+void mpd_qround_to_intx(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qround_to_int(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qtrunc(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qfloor(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qceil(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qabs(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qmax(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qmax_mag(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qmin(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qmin_mag(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qminus(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qplus(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qnext_minus(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qnext_plus(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qnext_toward(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qquantize(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qrescale(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qrescale_fmt(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qreduce(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qadd(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qadd_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qadd_i32(mpd_t *, const mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qadd_uint(mpd_t *, const mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qadd_u32(mpd_t *, const mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qsub_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub_i32(mpd_t *, const mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub_uint(mpd_t *, const mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub_u32(mpd_t *, const mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qmul_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul_i32(mpd_t *, const mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul_uint(mpd_t *, const mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul_u32(mpd_t *, const mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qfma(mpd_t *, const mpd_t *, const mpd_t *, const mpd_t *c, const mpd_context_t *, uint32_t *);
+void mpd_qdiv(mpd_t *q, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_i32(mpd_t *, const mpd_t *, int32_t, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_uint(mpd_t *, const mpd_t *, mpd_uint_t, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_u32(mpd_t *, const mpd_t *, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qdivint(mpd_t *q, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qrem(mpd_t *r, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qrem_near(mpd_t *r, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qdivmod(mpd_t *q, mpd_t *r, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qpow(mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qpowmod(mpd_t *, const mpd_t *, const mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qexp(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qln10(mpd_t *, mpd_ssize_t, uint32_t *);
+void mpd_qln(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qlog10(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qsqrt(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qinvroot(mpd_t *, const mpd_t *, const mpd_context_t *, uint32_t *);
+void mpd_qadd_i64(mpd_t *, const mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qadd_u64(mpd_t *, const mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub_i64(mpd_t *, const mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qsub_u64(mpd_t *, const mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul_i64(mpd_t *, const mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qmul_u64(mpd_t *, const mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_i64(mpd_t *, const mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qdiv_u64(mpd_t *, const mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);

-/* quietly get a C integer type from an mpd_t */
-mpd_ssize_t mpd_qget_ssize(const mpd_t *dec, uint32_t *status);
-mpd_uint_t mpd_qget_uint(const mpd_t *dec, uint32_t *status);
-mpd_uint_t mpd_qabs_uint(const mpd_t *dec, uint32_t *status);
-
-int32_t mpd_qget_i32(const mpd_t *dec, uint32_t *status);
-uint32_t mpd_qget_u32(const mpd_t *dec, uint32_t *status);
-#ifndef LEGACY_COMPILER
-int64_t mpd_qget_i64(const mpd_t *dec, uint32_t *status);
-uint64_t mpd_qget_u64(const mpd_t *dec, uint32_t *status);
-#endif
-
-/* quiet functions */
-int mpd_qcheck_nan(mpd_t *nanresult, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-int mpd_qcheck_nans(mpd_t *nanresult, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qfinalize(mpd_t *result, const mpd_context_t *ctx, uint32_t *status);
-
-const char *mpd_class(const mpd_t *a, const mpd_context_t *ctx);
-
-int mpd_qcopy(mpd_t *result, const mpd_t *a,  uint32_t *status);
-mpd_t *mpd_qncopy(const mpd_t *a);
-int mpd_qcopy_abs(mpd_t *result, const mpd_t *a, uint32_t *status);
-int mpd_qcopy_negate(mpd_t *result, const mpd_t *a, uint32_t *status);
-int mpd_qcopy_sign(mpd_t *result, const mpd_t *a, const mpd_t *b, uint32_t *status);
-
-void mpd_qand(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qinvert(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qlogb(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qor(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qscaleb(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qxor(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-int mpd_same_quantum(const mpd_t *a, const mpd_t *b);
-
-void mpd_qrotate(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-int mpd_qshiftl(mpd_t *result, const mpd_t *a, mpd_ssize_t n, uint32_t *status);
-mpd_uint_t mpd_qshiftr(mpd_t *result, const mpd_t *a, mpd_ssize_t n, uint32_t *status);
-mpd_uint_t mpd_qshiftr_inplace(mpd_t *result, mpd_ssize_t n);
-void mpd_qshift(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qshiftn(mpd_t *result, const mpd_t *a, mpd_ssize_t n, const mpd_context_t *ctx, uint32_t *status);
-
-int mpd_qcmp(const mpd_t *a, const mpd_t *b, uint32_t *status);
-int mpd_qcompare(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-int mpd_qcompare_signal(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-int mpd_cmp_total(const mpd_t *a, const mpd_t *b);
-int mpd_cmp_total_mag(const mpd_t *a, const mpd_t *b);
-int mpd_compare_total(mpd_t *result, const mpd_t *a, const mpd_t *b);
-int mpd_compare_total_mag(mpd_t *result, const mpd_t *a, const mpd_t *b);
-
-void mpd_qround_to_intx(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qround_to_int(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qtrunc(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qfloor(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qceil(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-
-void mpd_qabs(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmax(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmax_mag(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmin(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmin_mag(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qminus(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qplus(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qnext_minus(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qnext_plus(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qnext_toward(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qquantize(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qrescale(mpd_t *result, const mpd_t *a, mpd_ssize_t exp, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qrescale_fmt(mpd_t *result, const mpd_t *a, mpd_ssize_t exp, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qreduce(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd_i32(mpd_t *result, const mpd_t *a, int32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd_u32(mpd_t *result, const mpd_t *a, uint32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_i32(mpd_t *result, const mpd_t *a, int32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_u32(mpd_t *result, const mpd_t *a, uint32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_i32(mpd_t *result, const mpd_t *a, int32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_u32(mpd_t *result, const mpd_t *a, uint32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qfma(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_t *c, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv(mpd_t *q, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_i32(mpd_t *result, const mpd_t *a, int32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_u32(mpd_t *result, const mpd_t *a, uint32_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdivint(mpd_t *q, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qrem(mpd_t *r, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qrem_near(mpd_t *r, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdivmod(mpd_t *q, mpd_t *r, const mpd_t *a, const mpd_t *b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qpow(mpd_t *result, const mpd_t *base, const mpd_t *exp, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qpowmod(mpd_t *result, const mpd_t *base, const mpd_t *exp, const mpd_t *mod, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qexp(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qln10(mpd_t *result, mpd_ssize_t prec, uint32_t *status);
-void mpd_qln(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qlog10(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsqrt(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, uint32_t *status);
-
-#ifndef LEGACY_COMPILER
-void mpd_qadd_i64(mpd_t *result, const mpd_t *a, int64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qadd_u64(mpd_t *result, const mpd_t *a, uint64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_i64(mpd_t *result, const mpd_t *a, int64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsub_u64(mpd_t *result, const mpd_t *a, uint64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_i64(mpd_t *result, const mpd_t *a, int64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qmul_u64(mpd_t *result, const mpd_t *a, uint64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_i64(mpd_t *result, const mpd_t *a, int64_t b, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qdiv_u64(mpd_t *result, const mpd_t *a, uint64_t b, const mpd_context_t *ctx, uint32_t *status);
-#endif
-
-
-size_t mpd_sizeinbase(const mpd_t *a, uint32_t base);
-void mpd_qimport_u16(mpd_t *result, const uint16_t *srcdata, size_t srclen,
-                     uint8_t srcsign, uint32_t srcbase,
-                     const mpd_context_t *ctx, uint32_t *status);
-void mpd_qimport_u32(mpd_t *result, const uint32_t *srcdata, size_t srclen,
-                     uint8_t srcsign, uint32_t srcbase,
-                     const mpd_context_t *ctx, uint32_t *status);
-size_t mpd_qexport_u16(uint16_t **rdata, size_t rlen, uint32_t base,
-                       const mpd_t *src, uint32_t *status);
-size_t mpd_qexport_u32(uint32_t **rdata, size_t rlen, uint32_t base,
-                       const mpd_t *src, uint32_t *status);
+size_t mpd_sizeinbase(const mpd_t *, uint32_t);
+void mpd_qimport_u16(mpd_t *, const uint16_t *, size_t, uint8_t, uint32_t, const mpd_context_t *, uint32_t *);
+void mpd_qimport_u32(mpd_t *, const uint32_t *, size_t, uint8_t, uint32_t, const mpd_context_t *, uint32_t *);
+size_t mpd_qexport_u16(uint16_t **, size_t, uint32_t, const mpd_t *, uint32_t *);
+size_t mpd_qexport_u32(uint32_t **, size_t, uint32_t, const mpd_t *, uint32_t *);


 /******************************************************************************/
 /*                           Signalling functions                             */
 /******************************************************************************/

-char *mpd_format(const mpd_t *dec, const char *fmt, mpd_context_t *ctx);
-void mpd_import_u16(mpd_t *result, const uint16_t *srcdata, size_t srclen, uint8_t srcsign, uint32_t base, mpd_context_t *ctx);
-void mpd_import_u32(mpd_t *result, const uint32_t *srcdata, size_t srclen, uint8_t srcsign, uint32_t base, mpd_context_t *ctx);
-size_t mpd_export_u16(uint16_t **rdata, size_t rlen, uint32_t base, const mpd_t *src, mpd_context_t *ctx);
-size_t mpd_export_u32(uint32_t **rdata, size_t rlen, uint32_t base, const mpd_t *src, mpd_context_t *ctx);
-void mpd_finalize(mpd_t *result, mpd_context_t *ctx);
-int mpd_check_nan(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-int mpd_check_nans(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_set_string(mpd_t *result, const char *s, mpd_context_t *ctx);
-void mpd_maxcoeff(mpd_t *result, mpd_context_t *ctx);
-void mpd_sset_ssize(mpd_t *result, mpd_ssize_t a, mpd_context_t *ctx);
-void mpd_sset_i32(mpd_t *result, int32_t a, mpd_context_t *ctx);
-void mpd_sset_uint(mpd_t *result, mpd_uint_t a, mpd_context_t *ctx);
-void mpd_sset_u32(mpd_t *result, uint32_t a, mpd_context_t *ctx);
-void mpd_set_ssize(mpd_t *result, mpd_ssize_t a, mpd_context_t *ctx);
-void mpd_set_i32(mpd_t *result, int32_t a, mpd_context_t *ctx);
-void mpd_set_uint(mpd_t *result, mpd_uint_t a, mpd_context_t *ctx);
-void mpd_set_u32(mpd_t *result, uint32_t a, mpd_context_t *ctx);
-#ifndef LEGACY_COMPILER
-void mpd_set_i64(mpd_t *result, int64_t a, mpd_context_t *ctx);
-void mpd_set_u64(mpd_t *result, uint64_t a, mpd_context_t *ctx);
-#endif
-mpd_ssize_t mpd_get_ssize(const mpd_t *a, mpd_context_t *ctx);
-mpd_uint_t mpd_get_uint(const mpd_t *a, mpd_context_t *ctx);
-mpd_uint_t mpd_abs_uint(const mpd_t *a, mpd_context_t *ctx);
-int32_t mpd_get_i32(const mpd_t *a, mpd_context_t *ctx);
-uint32_t mpd_get_u32(const mpd_t *a, mpd_context_t *ctx);
-#ifndef LEGACY_COMPILER
-int64_t mpd_get_i64(const mpd_t *a, mpd_context_t *ctx);
-uint64_t mpd_get_u64(const mpd_t *a, mpd_context_t *ctx);
-#endif
-void mpd_and(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_copy(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_canonical(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_copy_abs(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_copy_negate(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_copy_sign(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_invert(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_logb(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_or(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_rotate(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_scaleb(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_shiftl(mpd_t *result, const mpd_t *a, mpd_ssize_t n, mpd_context_t *ctx);
-mpd_uint_t mpd_shiftr(mpd_t *result, const mpd_t *a, mpd_ssize_t n, mpd_context_t *ctx);
-void mpd_shiftn(mpd_t *result, const mpd_t *a, mpd_ssize_t n, mpd_context_t *ctx);
-void mpd_shift(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_xor(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_abs(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-int mpd_cmp(const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-int mpd_compare(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-int mpd_compare_signal(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_add(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_add_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, mpd_context_t *ctx);
-void mpd_add_i32(mpd_t *result, const mpd_t *a, int32_t b, mpd_context_t *ctx);
-void mpd_add_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, mpd_context_t *ctx);
-void mpd_add_u32(mpd_t *result, const mpd_t *a, uint32_t b, mpd_context_t *ctx);
-void mpd_sub(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_sub_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, mpd_context_t *ctx);
-void mpd_sub_i32(mpd_t *result, const mpd_t *a, int32_t b, mpd_context_t *ctx);
-void mpd_sub_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, mpd_context_t *ctx);
-void mpd_sub_u32(mpd_t *result, const mpd_t *a, uint32_t b, mpd_context_t *ctx);
-void mpd_div(mpd_t *q, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_div_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, mpd_context_t *ctx);
-void mpd_div_i32(mpd_t *result, const mpd_t *a, int32_t b, mpd_context_t *ctx);
-void mpd_div_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, mpd_context_t *ctx);
-void mpd_div_u32(mpd_t *result, const mpd_t *a, uint32_t b, mpd_context_t *ctx);
-void mpd_divmod(mpd_t *q, mpd_t *r, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_divint(mpd_t *q, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_exp(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_fma(mpd_t *result, const mpd_t *a, const mpd_t *b, const mpd_t *c, mpd_context_t *ctx);
-void mpd_ln(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_log10(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_max(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_max_mag(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_min(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_min_mag(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_minus(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_mul(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_mul_ssize(mpd_t *result, const mpd_t *a, mpd_ssize_t b, mpd_context_t *ctx);
-void mpd_mul_i32(mpd_t *result, const mpd_t *a, int32_t b, mpd_context_t *ctx);
-void mpd_mul_uint(mpd_t *result, const mpd_t *a, mpd_uint_t b, mpd_context_t *ctx);
-void mpd_mul_u32(mpd_t *result, const mpd_t *a, uint32_t b, mpd_context_t *ctx);
-void mpd_next_minus(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_next_plus(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_next_toward(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_plus(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_pow(mpd_t *result, const mpd_t *base, const mpd_t *exp, mpd_context_t *ctx);
-void mpd_powmod(mpd_t *result, const mpd_t *base, const mpd_t *exp, const mpd_t *mod, mpd_context_t *ctx);
-void mpd_quantize(mpd_t *result, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_rescale(mpd_t *result, const mpd_t *a, mpd_ssize_t exp, mpd_context_t *ctx);
-void mpd_reduce(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_rem(mpd_t *r, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_rem_near(mpd_t *r, const mpd_t *a, const mpd_t *b, mpd_context_t *ctx);
-void mpd_round_to_intx(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_round_to_int(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_trunc(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_floor(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_ceil(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_sqrt(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-void mpd_invroot(mpd_t *result, const mpd_t *a, mpd_context_t *ctx);
-
-#ifndef LEGACY_COMPILER
-void mpd_add_i64(mpd_t *result, const mpd_t *a, int64_t b, mpd_context_t *ctx);
-void mpd_add_u64(mpd_t *result, const mpd_t *a, uint64_t b, mpd_context_t *ctx);
-void mpd_sub_i64(mpd_t *result, const mpd_t *a, int64_t b, mpd_context_t *ctx);
-void mpd_sub_u64(mpd_t *result, const mpd_t *a, uint64_t b, mpd_context_t *ctx);
-void mpd_div_i64(mpd_t *result, const mpd_t *a, int64_t b, mpd_context_t *ctx);
-void mpd_div_u64(mpd_t *result, const mpd_t *a, uint64_t b, mpd_context_t *ctx);
-void mpd_mul_i64(mpd_t *result, const mpd_t *a, int64_t b, mpd_context_t *ctx);
-void mpd_mul_u64(mpd_t *result, const mpd_t *a, uint64_t b, mpd_context_t *ctx);
-#endif
+char *mpd_format(const mpd_t *, const char *, mpd_context_t *);
+void mpd_import_u16(mpd_t *, const uint16_t *, size_t, uint8_t, uint32_t, mpd_context_t *);
+void mpd_import_u32(mpd_t *, const uint32_t *, size_t, uint8_t, uint32_t, mpd_context_t *);
+size_t mpd_export_u16(uint16_t **, size_t, uint32_t, const mpd_t *, mpd_context_t *);
+size_t mpd_export_u32(uint32_t **, size_t, uint32_t, const mpd_t *, mpd_context_t *);
+void mpd_finalize(mpd_t *, mpd_context_t *);
+int mpd_check_nan(mpd_t *, const mpd_t *, mpd_context_t *);
+int mpd_check_nans(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_set_string(mpd_t *, const char *s, mpd_context_t *);
+void mpd_maxcoeff(mpd_t *, mpd_context_t *);
+void mpd_sset_ssize(mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_sset_i32(mpd_t *, int32_t, mpd_context_t *);
+void mpd_sset_uint(mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_sset_u32(mpd_t *, uint32_t, mpd_context_t *);
+void mpd_set_ssize(mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_set_i32(mpd_t *, int32_t, mpd_context_t *);
+void mpd_set_uint(mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_set_u32(mpd_t *, uint32_t, mpd_context_t *);
+void mpd_set_i64(mpd_t *, int64_t, mpd_context_t *);
+void mpd_set_u64(mpd_t *, uint64_t, mpd_context_t *);
+mpd_ssize_t mpd_get_ssize(const mpd_t *, mpd_context_t *);
+mpd_uint_t mpd_get_uint(const mpd_t *, mpd_context_t *);
+mpd_uint_t mpd_abs_uint(const mpd_t *, mpd_context_t *);
+int32_t mpd_get_i32(const mpd_t *, mpd_context_t *);
+uint32_t mpd_get_u32(const mpd_t *, mpd_context_t *);
+int64_t mpd_get_i64(const mpd_t *, mpd_context_t *);
+uint64_t mpd_get_u64(const mpd_t *, mpd_context_t *);
+void mpd_and(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_copy(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_canonical(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_copy_abs(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_copy_negate(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_copy_sign(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_invert(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_logb(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_or(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_rotate(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_scaleb(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_shiftl(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+mpd_uint_t mpd_shiftr(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_shiftn(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_shift(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_xor(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_abs(mpd_t *, const mpd_t *, mpd_context_t *);
+int mpd_cmp(const mpd_t *, const mpd_t *, mpd_context_t *);
+int mpd_compare(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+int mpd_compare_signal(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_add(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_add_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_add_i32(mpd_t *, const mpd_t *, int32_t, mpd_context_t *);
+void mpd_add_uint(mpd_t *, const mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_add_u32(mpd_t *, const mpd_t *, uint32_t, mpd_context_t *);
+void mpd_sub(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_sub_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_sub_i32(mpd_t *, const mpd_t *, int32_t, mpd_context_t *);
+void mpd_sub_uint(mpd_t *, const mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_sub_u32(mpd_t *, const mpd_t *, uint32_t, mpd_context_t *);
+void mpd_div(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_div_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_div_i32(mpd_t *, const mpd_t *, int32_t, mpd_context_t *);
+void mpd_div_uint(mpd_t *, const mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_div_u32(mpd_t *, const mpd_t *, uint32_t, mpd_context_t *);
+void mpd_divmod(mpd_t *, mpd_t *r, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_divint(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_exp(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_fma(mpd_t *, const mpd_t *, const mpd_t *, const mpd_t *c, mpd_context_t *);
+void mpd_ln(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_log10(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_max(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_max_mag(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_min(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_min_mag(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_minus(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_mul(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_mul_ssize(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_mul_i32(mpd_t *, const mpd_t *, int32_t, mpd_context_t *);
+void mpd_mul_uint(mpd_t *, const mpd_t *, mpd_uint_t, mpd_context_t *);
+void mpd_mul_u32(mpd_t *, const mpd_t *, uint32_t, mpd_context_t *);
+void mpd_next_minus(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_next_plus(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_next_toward(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_plus(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_pow(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_powmod(mpd_t *, const mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_quantize(mpd_t *, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_rescale(mpd_t *, const mpd_t *, mpd_ssize_t, mpd_context_t *);
+void mpd_reduce(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_rem(mpd_t *r, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_rem_near(mpd_t *r, const mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_round_to_intx(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_round_to_int(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_trunc(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_floor(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_ceil(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_sqrt(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_invroot(mpd_t *, const mpd_t *, mpd_context_t *);
+void mpd_add_i64(mpd_t *, const mpd_t *, int64_t, mpd_context_t *);
+void mpd_add_u64(mpd_t *, const mpd_t *, uint64_t, mpd_context_t *);
+void mpd_sub_i64(mpd_t *, const mpd_t *, int64_t, mpd_context_t *);
+void mpd_sub_u64(mpd_t *, const mpd_t *, uint64_t, mpd_context_t *);
+void mpd_div_i64(mpd_t *, const mpd_t *, int64_t, mpd_context_t *);
+void mpd_div_u64(mpd_t *, const mpd_t *, uint64_t, mpd_context_t *);
+void mpd_mul_i64(mpd_t *, const mpd_t *, int64_t, mpd_context_t *);
+void mpd_mul_u64(mpd_t *, const mpd_t *, uint64_t, mpd_context_t *);


 /******************************************************************************/
 /*                          Configuration specific                            */
 /******************************************************************************/

-#ifdef CONFIG_64
-void mpd_qsset_i64(mpd_t *result, int64_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_qsset_u64(mpd_t *result, uint64_t a, const mpd_context_t *ctx, uint32_t *status);
-void mpd_sset_i64(mpd_t *result, int64_t a, mpd_context_t *ctx);
-void mpd_sset_u64(mpd_t *result, uint64_t a, mpd_context_t *ctx);
-#endif
+void mpd_qsset_i64(mpd_t *, int64_t, const mpd_context_t *, uint32_t *);
+void mpd_qsset_u64(mpd_t *, uint64_t, const mpd_context_t *, uint32_t *);
+void mpd_sset_i64(mpd_t *, int64_t, mpd_context_t *);
+void mpd_sset_u64(mpd_t *, uint64_t, mpd_context_t *);


 /******************************************************************************/
 /*                       Get attributes of a decimal                          */
 /******************************************************************************/

-EXTINLINE mpd_ssize_t mpd_adjexp(const mpd_t *dec);
-EXTINLINE mpd_ssize_t mpd_etiny(const mpd_context_t *ctx);
-EXTINLINE mpd_ssize_t mpd_etop(const mpd_context_t *ctx);
-EXTINLINE mpd_uint_t mpd_msword(const mpd_t *dec);
-EXTINLINE int mpd_word_digits(mpd_uint_t word);
+mpd_ssize_t mpd_adjexp(const mpd_t *);
+mpd_ssize_t mpd_etiny(const mpd_context_t *);
+mpd_ssize_t mpd_etop(const mpd_context_t *);
+mpd_uint_t mpd_msword(const mpd_t *);
+int mpd_word_digits(mpd_uint_t);
 /* most significant digit of a word */
-EXTINLINE mpd_uint_t mpd_msd(mpd_uint_t word);
+mpd_uint_t mpd_msd(mpd_uint_t);
 /* least significant digit of a word */
-EXTINLINE mpd_uint_t mpd_lsd(mpd_uint_t word);
+mpd_uint_t mpd_lsd(mpd_uint_t);
 /* coefficient size needed to store 'digits' */
-EXTINLINE mpd_ssize_t mpd_digits_to_size(mpd_ssize_t digits);
+mpd_ssize_t mpd_digits_to_size(mpd_ssize_t);
 /* number of digits in the exponent, undefined for MPD_SSIZE_MIN */
-EXTINLINE int mpd_exp_digits(mpd_ssize_t exp);
-EXTINLINE int mpd_iscanonical(const mpd_t *dec UNUSED);
-EXTINLINE int mpd_isfinite(const mpd_t *dec);
-EXTINLINE int mpd_isinfinite(const mpd_t *dec);
-EXTINLINE int mpd_isinteger(const mpd_t *dec);
-EXTINLINE int mpd_isnan(const mpd_t *dec);
-EXTINLINE int mpd_isnegative(const mpd_t *dec);
-EXTINLINE int mpd_ispositive(const mpd_t *dec);
-EXTINLINE int mpd_isqnan(const mpd_t *dec);
-EXTINLINE int mpd_issigned(const mpd_t *dec);
-EXTINLINE int mpd_issnan(const mpd_t *dec);
-EXTINLINE int mpd_isspecial(const mpd_t *dec);
-EXTINLINE int mpd_iszero(const mpd_t *dec);
+int mpd_exp_digits(mpd_ssize_t);
+int mpd_iscanonical(const mpd_t *);
+int mpd_isfinite(const mpd_t *);
+int mpd_isinfinite(const mpd_t *);
+int mpd_isinteger(const mpd_t *);
+int mpd_isnan(const mpd_t *);
+int mpd_isnegative(const mpd_t *);
+int mpd_ispositive(const mpd_t *);
+int mpd_isqnan(const mpd_t *);
+int mpd_issigned(const mpd_t *);
+int mpd_issnan(const mpd_t *);
+int mpd_isspecial(const mpd_t *);
+int mpd_iszero(const mpd_t *);
 /* undefined for special numbers */
-EXTINLINE int mpd_iszerocoeff(const mpd_t *dec);
-EXTINLINE int mpd_isnormal(const mpd_t *dec, const mpd_context_t *ctx);
-EXTINLINE int mpd_issubnormal(const mpd_t *dec, const mpd_context_t *ctx);
+int mpd_iszerocoeff(const mpd_t *);
+int mpd_isnormal(const mpd_t *, const mpd_context_t *);
+int mpd_issubnormal(const mpd_t *, const mpd_context_t *);
 /* odd word */
-EXTINLINE int mpd_isoddword(mpd_uint_t word);
+int mpd_isoddword(mpd_uint_t);
 /* odd coefficient */
-EXTINLINE int mpd_isoddcoeff(const mpd_t *dec);
+int mpd_isoddcoeff(const mpd_t *);
 /* odd decimal, only defined for integers */
-int mpd_isodd(const mpd_t *dec);
+int mpd_isodd(const mpd_t *);
 /* even decimal, only defined for integers */
-int mpd_iseven(const mpd_t *dec);
+int mpd_iseven(const mpd_t *);
 /* 0 if dec is positive, 1 if dec is negative */
-EXTINLINE uint8_t mpd_sign(const mpd_t *dec);
+uint8_t mpd_sign(const mpd_t *);
 /* 1 if dec is positive, -1 if dec is negative */
-EXTINLINE int mpd_arith_sign(const mpd_t *dec);
-EXTINLINE long mpd_radix(void);
-EXTINLINE int mpd_isdynamic(const mpd_t *dec);
-EXTINLINE int mpd_isstatic(const mpd_t *dec);
-EXTINLINE int mpd_isdynamic_data(const mpd_t *dec);
-EXTINLINE int mpd_isstatic_data(const mpd_t *dec);
-EXTINLINE int mpd_isshared_data(const mpd_t *dec);
-EXTINLINE int mpd_isconst_data(const mpd_t *dec);
-EXTINLINE mpd_ssize_t mpd_trail_zeros(const mpd_t *dec);
+int mpd_arith_sign(const mpd_t *);
+long mpd_radix(void);
+int mpd_isdynamic(const mpd_t *);
+int mpd_isstatic(const mpd_t *);
+int mpd_isdynamic_data(const mpd_t *);
+int mpd_isstatic_data(const mpd_t *);
+int mpd_isshared_data(const mpd_t *);
+int mpd_isconst_data(const mpd_t *);
+mpd_ssize_t mpd_trail_zeros(const mpd_t *);


 /******************************************************************************/
 /*                       Set attributes of a decimal                          */
 /******************************************************************************/

-/* set number of decimal digits in the coefficient */
-EXTINLINE void mpd_setdigits(mpd_t *result);
-EXTINLINE void mpd_set_sign(mpd_t *result, uint8_t sign);
-/* copy sign from another decimal */
-EXTINLINE void mpd_signcpy(mpd_t *result, const mpd_t *a);
-EXTINLINE void mpd_set_infinity(mpd_t *result);
-EXTINLINE void mpd_set_qnan(mpd_t *result);
-EXTINLINE void mpd_set_snan(mpd_t *result);
-EXTINLINE void mpd_set_negative(mpd_t *result);
-EXTINLINE void mpd_set_positive(mpd_t *result);
-EXTINLINE void mpd_set_dynamic(mpd_t *result);
-EXTINLINE void mpd_set_static(mpd_t *result);
-EXTINLINE void mpd_set_dynamic_data(mpd_t *result);
-EXTINLINE void mpd_set_static_data(mpd_t *result);
-EXTINLINE void mpd_set_shared_data(mpd_t *result);
-EXTINLINE void mpd_set_const_data(mpd_t *result);
-EXTINLINE void mpd_clear_flags(mpd_t *result);
-EXTINLINE void mpd_set_flags(mpd_t *result, uint8_t flags);
-EXTINLINE void mpd_copy_flags(mpd_t *result, const mpd_t *a);
+void mpd_setdigits(mpd_t *);
+void mpd_set_sign(mpd_t *, uint8_t);
+void mpd_signcpy(mpd_t *, const mpd_t *);
+void mpd_set_infinity(mpd_t *);
+void mpd_set_qnan(mpd_t *);
+void mpd_set_snan(mpd_t *);
+void mpd_set_negative(mpd_t *);
+void mpd_set_positive(mpd_t *);
+void mpd_set_dynamic(mpd_t *);
+void mpd_set_static(mpd_t *);
+void mpd_set_dynamic_data(mpd_t *);
+void mpd_set_static_data(mpd_t *);
+void mpd_set_shared_data(mpd_t *);
+void mpd_set_const_data(mpd_t *);
+void mpd_clear_flags(mpd_t *);
+void mpd_set_flags(mpd_t *, uint8_t);
+void mpd_copy_flags(mpd_t *, const mpd_t *);


 /******************************************************************************/
@ -743,45 +583,30 @@ EXTINLINE void mpd_copy_flags(mpd_t *result, const mpd_t *a);
 /*                            Memory handling                                 */
 /******************************************************************************/

-extern void *(* mpd_mallocfunc)(size_t size);
-extern void *(* mpd_callocfunc)(size_t nmemb, size_t size);
-extern void *(* mpd_reallocfunc)(void *ptr, size_t size);
-extern void (* mpd_free)(void *ptr);
+extern void *(*mpd_mallocfunc)(size_t);
+extern void *(*mpd_callocfunc)(size_t, size_t);
+extern void *(*mpd_reallocfunc)(void *, size_t);
+extern void (*mpd_free)(void *);

-void *mpd_callocfunc_em(size_t nmemb, size_t size);
+void *mpd_callocfunc_em(size_t, size_t);

-void *mpd_alloc(mpd_size_t nmemb, mpd_size_t size);
-void *mpd_calloc(mpd_size_t nmemb, mpd_size_t size);
-void *mpd_realloc(void *ptr, mpd_size_t nmemb, mpd_size_t size, uint8_t *err);
-void *mpd_sh_alloc(mpd_size_t struct_size, mpd_size_t nmemb, mpd_size_t size);
+void *mpd_alloc(mpd_size_t, mpd_size_t);
+void *mpd_calloc(mpd_size_t, mpd_size_t);
+void *mpd_realloc(void *, mpd_size_t, mpd_size_t, uint8_t *);
+void *mpd_sh_alloc(mpd_size_t, mpd_size_t, mpd_size_t);

 mpd_t *mpd_qnew(void);
-mpd_t *mpd_new(mpd_context_t *ctx);
-mpd_t *mpd_qnew_size(mpd_ssize_t size);
-EXTINLINE void mpd_del(mpd_t *dec);
+mpd_t *mpd_new(mpd_context_t *);
+mpd_t *mpd_qnew_size(mpd_ssize_t);
+void mpd_del(mpd_t *);

-EXTINLINE void mpd_uint_zero(mpd_uint_t *dest, mpd_size_t len);
-EXTINLINE int mpd_qresize(mpd_t *result, mpd_ssize_t size, uint32_t *status);
-EXTINLINE int mpd_qresize_zero(mpd_t *result, mpd_ssize_t size, uint32_t *status);
-EXTINLINE void mpd_minalloc(mpd_t *result);
-
-int mpd_resize(mpd_t *result, mpd_ssize_t size, mpd_context_t *ctx);
-int mpd_resize_zero(mpd_t *result, mpd_ssize_t size, mpd_context_t *ctx);
-
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */
-
-
-#ifdef __cplusplus
-  #ifdef MPD_CLEAR_STDC_LIMIT_MACROS
-    #undef MPD_CLEAR_STDC_LIMIT_MACROS
-    #undef __STDC_LIMIT_MACROS
-  #endif
-} /* END extern "C" */
-#endif
+void mpd_uint_zero(mpd_uint_t *, mpd_size_t);
+int mpd_qresize(mpd_t *, mpd_ssize_t, uint32_t *);
+int mpd_qresize_zero(mpd_t *, mpd_ssize_t, uint32_t *);
+void mpd_minalloc(mpd_t *);

+int mpd_resize(mpd_t *, mpd_ssize_t, mpd_context_t *);
+int mpd_resize_zero(mpd_t *, mpd_ssize_t, mpd_context_t *);

+COSMOPOLITAN_C_END_
 #endif /* MPDECIMAL_H */
-
-
-
--- a/third_party/python/Modules/_decimal/libmpdec/numbertheory.c
+++ b/third_party/python/Modules/_decimal/libmpdec/numbertheory.c
@ -38,10 +38,8 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
 /* Bignum: Initialize the Number Theoretic Transform. */

-
 /*
 * Return the nth root of unity in F(p). This corresponds to e**((2*pi*i)/n)
 * in the Fourier transform. We have w**n == 1 (mod p).
@ -53,16 +51,10 @@ mpd_uint_t
 _mpd_getkernel(mpd_uint_t n, int sign, int modnum)
 {
    mpd_uint_t umod, p, r, xi;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
-
    SETMODULUS(modnum);
    r = mpd_roots[modnum]; /* primitive root of F(p) */
    p = umod;
    xi = (p-1) / n;
-
    if (sign == -1)
        return POWMOD(r, (p-1-xi));
    else
@ -80,38 +72,28 @@ _mpd_init_fnt_params(mpd_size_t n, int sign, int modnum)
 {
    struct fnt_params *tparams;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t kernel, w;
    mpd_uint_t i;
    mpd_size_t nhalf;
-
    assert(ispower2(n));
    assert(sign == -1 || sign == 1);
    assert(P1 <= modnum && modnum <= P3);
-
    nhalf = n/2;
    tparams = mpd_sh_alloc(sizeof *tparams, nhalf, sizeof (mpd_uint_t));
    if (tparams == NULL) {
        return NULL;
    }
-
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, sign, modnum);
-
    tparams->modnum = modnum;
    tparams->modulus = umod;
    tparams->kernel = kernel;
-
    /* wtable[] := w**0, w**1, ..., w**(nhalf-1) */
    w = 1;
    for (i = 0; i < nhalf; i++) {
        tparams->wtable[i] = w;
        w = MULMOD(w, kernel);
    }
-
    return tparams;
 }

@ -120,15 +102,9 @@ void
 _mpd_init_w3table(mpd_uint_t w3table[3], int sign, int modnum)
 {
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t kernel;
-
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(3, sign, modnum);
-
    w3table[0] = 1;
    w3table[1] = kernel;
    w3table[2] = POWMOD(kernel, 2);
--- a/third_party/python/Modules/_decimal/libmpdec/numbertheory.h
+++ b/third_party/python/Modules/_decimal/libmpdec/numbertheory.h
@ -2,40 +2,20 @@
 #define NUMBER_THEORY_H
 #include "third_party/python/Modules/_decimal/libmpdec/constants.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
-/* clang-format off */

-/* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)
-
-/* transform parameters */
 struct fnt_params {
-    int modnum;
-    mpd_uint_t modulus;
-    mpd_uint_t kernel;
-    mpd_uint_t wtable[];
+  int modnum;
+  mpd_uint_t modulus;
+  mpd_uint_t kernel;
+  mpd_uint_t wtable[];
 };

-mpd_uint_t _mpd_getkernel(mpd_uint_t n, int sign, int modnum);
-struct fnt_params *_mpd_init_fnt_params(mpd_size_t n, int sign, int modnum);
-void _mpd_init_w3table(mpd_uint_t w3table[3], int sign, int modnum);
+mpd_uint_t _mpd_getkernel(mpd_uint_t, int, int);
+struct fnt_params *_mpd_init_fnt_params(mpd_size_t, int, int);
+void _mpd_init_w3table(mpd_uint_t[3], int, int);

-#ifdef PPRO
-static inline void
-ppro_setmodulus(int modnum, mpd_uint_t *umod, double *dmod, uint32_t dinvmod[3])
-{
-    *dmod = *umod =  mpd_moduli[modnum];
-    dinvmod[0] = mpd_invmoduli[modnum][0];
-    dinvmod[1] = mpd_invmoduli[modnum][1];
-    dinvmod[2] = mpd_invmoduli[modnum][2];
+static inline void std_setmodulus(int modnum, mpd_uint_t *umod) {
+  *umod = mpd_moduli[modnum];
 }
-#else
-static inline void
-std_setmodulus(int modnum, mpd_uint_t *umod)
-{
-    *umod =  mpd_moduli[modnum];
-}
-#endif
-
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/sixstep.c
+++ b/third_party/python/Modules/_decimal/libmpdec/sixstep.c
@ -41,10 +41,66 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

+/*
+                Cache Efficient Matrix Fourier Transform
+                          for arrays of form 2ⁿ

-/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
-   form 2**n (See literature/six-step.txt). */

+The Six Step Transform
+══════════════════════
+
+In libmpdec, the six-step transform is the Matrix Fourier Transform in
+disguise. It is called six-step transform after a variant that appears
+in [1]. The algorithm requires that the input array can be viewed as an
+R×C matrix.
+
+
+Algorithm six-step (forward transform)
+──────────────────────────────────────
+
+  1a) Transpose the matrix.
+
+  1b) Apply a length R FNT to each row.
+
+  1c) Transpose the matrix.
+
+  2) Multiply each matrix element (addressed by j×C+m) by r**(j×m).
+
+  3) Apply a length C FNT to each row.
+
+  4) Transpose the matrix.
+
+Note that steps 1a) - 1c) are exactly equivalent to step 1) of the Matrix
+Fourier Transform. For large R, it is faster to transpose twice and do
+a transform on the rows than to perform a column transpose directly.
+
+
+Algorithm six-step (inverse transform)
+──────────────────────────────────────
+
+  0) View the matrix as a C×R matrix.
+
+  1) Transpose the matrix, producing an R×C matrix.
+
+  2) Apply a length C FNT to each row.
+
+  3) Multiply each matrix element (addressed by i×C+n) by r**(i×n).
+
+  4a) Transpose the matrix.
+
+  4b) Apply a length R FNT to each row.
+
+  4c) Transpose the matrix.
+
+Again, steps 4a) - 4c) are equivalent to step 4) of the Matrix Fourier
+Transform.
+
+
+──
+
+  [1] David H. Bailey: FFTs in External or Hierarchical Memory
+      http://crd.lbl.gov/~dhbailey/dhbpapers/
+*/

 /* forward transform with sign = -1 */
 int
@ -54,28 +110,18 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    mpd_size_t log2n, C, R;
    mpd_uint_t kernel;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t *x, w0, w1, wstep;
    mpd_size_t i, k;
-
-
    assert(ispower2(n));
    assert(n >= 16);
    assert(n <= MPD_MAXTRANSFORM_2N);
-
    log2n = mpd_bsr(n);
    C = ((mpd_size_t)1) << (log2n / 2);  /* number of columns */
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
-
-
    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        return 0;
    }
-
    /* Length R transform on the rows. */
    if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
        return 0;
@ -83,13 +129,11 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    for (x = a; x < a+n; x += R) {
        fnt_dif2(x, R, tparams);
    }
-
    /* Transpose the matrix. */
    if (!transpose_pow2(a, C, R)) {
        mpd_free(tparams);
        return 0;
    }
-
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, -1, modnum);
@ -106,7 +150,6 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
            a[i*C+k+1] = x1;
        }
    }
-
    /* Length C transform on the rows. */
    if (C != R) {
        mpd_free(tparams);
@ -118,7 +161,6 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
        fnt_dif2(x, C, tparams);
    }
    mpd_free(tparams);
-
 #if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix. */
@ -126,11 +168,9 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
        return 0;
    }
 #endif
-
    return 1;
 }

-
 /* reverse transform, sign = 1 */
 int
 inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
@ -139,23 +179,14 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    mpd_size_t log2n, C, R;
    mpd_uint_t kernel;
    mpd_uint_t umod;
-#ifdef PPRO
-    double dmod;
-    uint32_t dinvmod[3];
-#endif
    mpd_uint_t *x, w0, w1, wstep;
    mpd_size_t i, k;
-
-
    assert(ispower2(n));
    assert(n >= 16);
    assert(n <= MPD_MAXTRANSFORM_2N);
-
    log2n = mpd_bsr(n);
    C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
    R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
-
-
 #if 0
    /* An unordered transform is sufficient for convolution. */
    /* Transpose the matrix, producing an R*C matrix. */
@ -163,7 +194,6 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
        return 0;
    }
 #endif
-
    /* Length C transform on the rows. */
    if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
        return 0;
@ -171,7 +201,6 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
    for (x = a; x < a+n; x += C) {
        fnt_dif2(x, C, tparams);
    }
-
    /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
    SETMODULUS(modnum);
    kernel = _mpd_getkernel(n, 1, modnum);
@ -188,13 +217,11 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
            a[i*C+k+1] = x1;
        }
    }
-
    /* Transpose the matrix. */
    if (!transpose_pow2(a, R, C)) {
        mpd_free(tparams);
        return 0;
    }
-
    /* Length R transform on the rows. */
    if (R != C) {
        mpd_free(tparams);
@ -206,11 +233,9 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
        fnt_dif2(x, R, tparams);
    }
    mpd_free(tparams);
-
    /* Transpose the matrix. */
    if (!transpose_pow2(a, C, R)) {
        return 0;
    }
-
    return 1;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/sixstep.h
+++ b/third_party/python/Modules/_decimal/libmpdec/sixstep.h
@ -4,11 +4,9 @@
 /* clang-format off */

 /* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

 int six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum);
 int inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum);

-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/transpose.c
+++ b/third_party/python/Modules/_decimal/libmpdec/transpose.c
@ -39,22 +39,18 @@ libmpdec (BSD-2)\\n\
 Copyright 2008-2016 Stefan Krah\"");
 asm(".include \"libc/disclaimer.inc\"");

-
 #define BUFSIZE 4096
 #define SIDE 128

-
 /* Bignum: The transpose functions are used for very large transforms
   in sixstep.c and fourstep.c. */

-
 /* Definition of the matrix transpose */
 void
 std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
 {
    mpd_size_t idest, isrc;
    mpd_size_t r, c;
-
    for (r = 0; r < rows; r++) {
        isrc = r * cols;
        idest = r;
@ -83,10 +79,7 @@ swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir
    mpd_size_t m, r=0;
    mpd_size_t offset;
    mpd_size_t next;
-
-
    assert(cols == mul_size_t(2, rows));
-
    if (dir == FORWARD_CYCLE) {
        r = rows;
    }
@ -96,52 +89,36 @@ swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir
    else {
        abort(); /* GCOV_NOT_REACHED */
    }
-
    m = cols - 1;
    hmax = rows; /* cycles start at odd halfrows */
    dbits = 8 * sizeof *done;
    if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
        return 0;
    }
-
    for (hn = 1; hn <= hmax; hn += 2) {
-
        if (done[hn/dbits] & mpd_bits[hn%dbits]) {
            continue;
        }
-
        readbuf = buf1; writebuf = buf2;
-
        for (offset = 0; offset < cols/2; offset += b) {
-
            stride = (offset + b < cols/2) ? b : cols/2-offset;
-
            hp = matrix + hn*cols/2;
            memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
            pointerswap(&readbuf, &writebuf);
-
            next = mulmod_size_t(hn, r, m);
            hp = matrix + next*cols/2;
-
            while (next != hn) {
-
                memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
                memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
                pointerswap(&readbuf, &writebuf);
-
                done[next/dbits] |= mpd_bits[next%dbits];
-
                next = mulmod_size_t(next, r, m);
                    hp = matrix + next*cols/2;
-
            }
-
            memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
-
            done[hn/dbits] |= mpd_bits[hn%dbits];
        }
    }
-
    mpd_free(done);
    return 1;
 }
@ -153,7 +130,6 @@ squaretrans(mpd_uint_t *buf, mpd_size_t cols)
    mpd_uint_t tmp;
    mpd_size_t idest, isrc;
    mpd_size_t r, c;
-
    for (r = 0; r < cols; r++) {
        c = r+1;
        isrc = r*cols + c;
@ -182,13 +158,9 @@ squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
    mpd_size_t b = size;
    mpd_size_t r, c;
    mpd_size_t i;
-
    while (b > SIDE) b >>= 1;
-
    for (r = 0; r < size; r += b) {
-
        for (c = r; c < size; c += b) {
-
            from = matrix + r*size + c;
            to = buf1;
            for (i = 0; i < b; i++) {
@ -197,7 +169,6 @@ squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
                to += b;
            }
            squaretrans(buf1, b);
-
            if (r == c) {
                to = matrix + r*size + c;
                from = buf1;
@ -217,7 +188,6 @@ squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
                    to += b;
                }
                squaretrans(buf2, b);
-
                to = matrix + c*size + r;
                from = buf1;
                for (i = 0; i < b; i++) {
@ -225,7 +195,6 @@ squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
                    from += b;
                    to += size;
                }
-
                to = matrix + r*size + c;
                from = buf2;
                for (i = 0; i < b; i++) {
@ -236,7 +205,6 @@ squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
            }
        }
    }
-
 }

 /*
@ -247,10 +215,8 @@ int
 transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
 {
    mpd_size_t size = mul_size_t(rows, cols);
-
    assert(ispower2(rows));
    assert(ispower2(cols));
-
    if (cols == rows) {
        squaretrans_pow2(matrix, rows);
    }
@ -269,8 +235,7 @@ transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
        }
    }
    else {
-        abort(); /* GCOV_NOT_REACHED */
+        unreachable;
    }
-
    return 1;
 }
--- a/third_party/python/Modules/_decimal/libmpdec/transpose.h
+++ b/third_party/python/Modules/_decimal/libmpdec/transpose.h
@ -4,7 +4,6 @@
 /* clang-format off */

 /* Internal header file: all symbols have local scope in the DSO */
-MPD_PRAGMA(MPD_HIDE_SYMBOLS_START)

 enum {FORWARD_CYCLE, BACKWARD_CYCLE};

@ -20,6 +19,5 @@ static inline void pointerswap(mpd_uint_t **a, mpd_uint_t **b)
    *a = tmp;
 }

-MPD_PRAGMA(MPD_HIDE_SYMBOLS_END) /* restore previous scope rules */

 #endif
--- a/third_party/python/Modules/_decimal/libmpdec/typearith.h
+++ b/third_party/python/Modules/_decimal/libmpdec/typearith.h
@ -4,68 +4,57 @@
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 /* clang-format off */

+#if defined(__GNUC__) && defined(__x86_64__) && !defined(__STRICT_ANSI__)

-/*****************************************************************************/
-/*                 Low level native arithmetic on basic types                */
-/*****************************************************************************/
-
-
-/** ------------------------------------------------------------
- **           Double width multiplication and division
- ** ------------------------------------------------------------
- */
-
-#if defined(CONFIG_64)
-#if defined(ANSI)
-#if defined(HAVE_UINT128_T)
 static inline void
 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
 {
-    __uint128_t hl;
-
-    hl = (__uint128_t)a * b;
-
-    *hi = hl >> 64;
-    *lo = (mpd_uint_t)hl;
+    mpd_uint_t h, l;
+    asm ( "mulq %3\n\t"
+          : "=d" (h), "=a" (l)
+          : "%a" (a), "rm" (b)
+          : "cc"
+    );
+    *hi = h;
+    *lo = l;
 }

 static inline void
 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
               mpd_uint_t d)
 {
-    __uint128_t hl;
-
-    hl = ((__uint128_t)hi<<64) + lo;
-    *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
-    *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d);
+    mpd_uint_t qq, rr;
+    asm ( "divq %4\n\t"
+          : "=a" (qq), "=d" (rr)
+          : "a" (lo), "d" (hi), "rm" (d)
+          : "cc"
+    );
+    *q = qq;
+    *r = rr;
 }
+
 #else
+
 static inline void
 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
 {
    uint32_t w[4], carry;
    uint32_t ah, al, bh, bl;
    uint64_t hl;
-
    ah = (uint32_t)(a>>32); al = (uint32_t)a;
    bh = (uint32_t)(b>>32); bl = (uint32_t)b;
-
    hl = (uint64_t)al * bl;
    w[0] = (uint32_t)hl;
    carry = (uint32_t)(hl>>32);
-
    hl = (uint64_t)ah * bl + carry;
    w[1] = (uint32_t)hl;
    w[2] = (uint32_t)(hl>>32);
-
    hl = (uint64_t)al * bh + w[1];
    w[1] = (uint32_t)hl;
    carry = (uint32_t)(hl>>32);
-
    hl = ((uint64_t)ah * bh + w[2]) + carry;
    w[2] = (uint32_t)hl;
    w[3] = (uint32_t)(hl>>32);
-
    *hi = ((uint64_t)w[3]<<32) + w[2];
    *lo = ((uint64_t)w[1]<<32) + w[0];
 }
@ -82,9 +71,7 @@ static inline int
 nlz(uint64_t x)
 {
    int n;
-
    if (x == 0) return(64);
-
    n = 0;
    if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
    if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
@ -92,7 +79,6 @@ nlz(uint64_t x)
    if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
    if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
    if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
-
    return n;
 }

@ -107,21 +93,16 @@ _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
               un32, un21, un10,
               rhat, t;
    int s;
-
    assert(u1 < v);
-
    s = nlz(v);
    v = v << s;
    vn1 = v >> 32;
    vn0 = v & 0xFFFFFFFF;
-
    t = (s == 0) ? 0 : u0 >> (64 - s);
    un32 = (u1 << s) | t;
    un10 = u0 << s;
-
    un1 = un10 >> 32;
    un0 = un10 & 0xFFFFFFFF;
-
    q1 = un32 / vn1;
    rhat = un32 - q1*vn1;
 again1:
@ -130,7 +111,6 @@ again1:
        rhat = rhat + vn1;
        if (rhat < b) goto again1;
    }
-
    /*
     *  Before again1 we had:
     *      (1) q1*vn1   + rhat         = un32
@ -157,7 +137,6 @@ again1:
     *  on the result.
     */
    un21 = un32*b + un1 - q1*v;
-
    q0 = un21 / vn1;
    rhat = un21 - q0*vn1;
 again2:
@ -166,55 +145,18 @@ again2:
        rhat = rhat + vn1;
        if (rhat < b) goto again2;
    }
-
    *q = q1*b + q0;
    *r = (un21*b + un0 - q0*v) >> s;
 }
-#endif

-/* END ANSI */
-#elif defined(ASM)
-static inline void
-_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
-{
-    mpd_uint_t h, l;
-
-    __asm__ ( "mulq %3\n\t"
-              : "=d" (h), "=a" (l)
-              : "%a" (a), "rm" (b)
-              : "cc"
-    );
-
-    *hi = h;
-    *lo = l;
-}
-
-static inline void
-_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
-               mpd_uint_t d)
-{
-    mpd_uint_t qq, rr;
-
-    __asm__ ( "divq %4\n\t"
-              : "=a" (qq), "=d" (rr)
-              : "a" (lo), "d" (hi), "rm" (d)
-              : "cc"
-    );
-
-    *q = qq;
-    *r = rr;
-}
-/* END GCC ASM */
-#else
-  #error "need platform specific 128 bit multiplication and division"
-#endif
+#endif /* ANSI */

 #define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
+
 static inline void
 _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
 {
    assert(exp <= 19);
-
    if (exp <= 9) {
        if (exp <= 4) {
            switch (exp) {
@ -251,240 +193,13 @@ _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
            case 16: DIVMOD(q, r, v, 10000000000000000ULL); break;
            case 17: DIVMOD(q, r, v, 100000000000000000ULL); break;
            case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break;
-            case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */
+            case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break;
+            default: unreachable;
            }
        }
    }
 }

-/* END CONFIG_64 */
-#elif defined(CONFIG_32)
-#if defined(ANSI)
-#if !defined(LEGACY_COMPILER)
-static inline void
-_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
-{
-    mpd_uuint_t hl;
-
-    hl = (mpd_uuint_t)a * b;
-
-    *hi = hl >> 32;
-    *lo = (mpd_uint_t)hl;
-}
-
-static inline void
-_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
-               mpd_uint_t d)
-{
-    mpd_uuint_t hl;
-
-    hl = ((mpd_uuint_t)hi<<32) + lo;
-    *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
-    *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
-}
-/* END ANSI + uint64_t */
-#else
-static inline void
-_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
-{
-    uint16_t w[4], carry;
-    uint16_t ah, al, bh, bl;
-    uint32_t hl;
-
-    ah = (uint16_t)(a>>16); al = (uint16_t)a;
-    bh = (uint16_t)(b>>16); bl = (uint16_t)b;
-
-    hl = (uint32_t)al * bl;
-    w[0] = (uint16_t)hl;
-    carry = (uint16_t)(hl>>16);
-
-    hl = (uint32_t)ah * bl + carry;
-    w[1] = (uint16_t)hl;
-    w[2] = (uint16_t)(hl>>16);
-
-    hl = (uint32_t)al * bh + w[1];
-    w[1] = (uint16_t)hl;
-    carry = (uint16_t)(hl>>16);
-
-    hl = ((uint32_t)ah * bh + w[2]) + carry;
-    w[2] = (uint16_t)hl;
-    w[3] = (uint16_t)(hl>>16);
-
-    *hi = ((uint32_t)w[3]<<16) + w[2];
-    *lo = ((uint32_t)w[1]<<16) + w[0];
-}
-
-/*
- * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
- * http://www.hackersdelight.org/permissions.htm:
- * "You are free to use, copy, and distribute any of the code on this web
- *  site, whether modified by you or not. You need not give attribution."
- *
- * Slightly modified, comments are mine.
- */
-static inline int
-nlz(uint32_t x)
-{
-    int n;
-
-    if (x == 0) return(32);
-
-    n = 0;
-    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
-    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
-    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
-    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
-    if (x <= 0x7FFFFFFF) {n = n + 1;}
-
-    return n;
-}
-
-static inline void
-_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
-               mpd_uint_t v)
-{
-    const mpd_uint_t b = 65536;
-    mpd_uint_t un1, un0,
-               vn1, vn0,
-               q1, q0,
-               un32, un21, un10,
-               rhat, t;
-    int s;
-
-    assert(u1 < v);
-
-    s = nlz(v);
-    v = v << s;
-    vn1 = v >> 16;
-    vn0 = v & 0xFFFF;
-
-    t = (s == 0) ? 0 : u0 >> (32 - s);
-    un32 = (u1 << s) | t;
-    un10 = u0 << s;
-
-    un1 = un10 >> 16;
-    un0 = un10 & 0xFFFF;
-
-    q1 = un32 / vn1;
-    rhat = un32 - q1*vn1;
-again1:
-    if (q1 >= b || q1*vn0 > b*rhat + un1) {
-        q1 = q1 - 1;
-        rhat = rhat + vn1;
-        if (rhat < b) goto again1;
-    }
-
-    /*
-     *  Before again1 we had:
-     *      (1) q1*vn1   + rhat         = un32
-     *      (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
-     *
-     *  The statements inside the if-clause do not change the value
-     *  of the left-hand side of (2), and the loop is only exited
-     *  if q1*vn0 <= rhat*b + un1, so:
-     *
-     *      (3) q1*vn1*b + q1*vn0 <= un32*b + un1
-     *      (4)              q1*v <= un32*b + un1
-     *      (5)                 0 <= un32*b + un1 - q1*v
-     *
-     *  By (5) we are certain that the possible add-back step from
-     *  Knuth's algorithm D is never required.
-     *
-     *  Since the final quotient is less than 2**32, the following
-     *  must be true:
-     *
-     *      (6) un32*b + un1 - q1*v <= UINT32_MAX
-     *
-     *  This means that in the following line, the high words
-     *  of un32*b and q1*v can be discarded without any effect
-     *  on the result.
-     */
-    un21 = un32*b + un1 - q1*v;
-
-    q0 = un21 / vn1;
-    rhat = un21 - q0*vn1;
-again2:
-    if (q0 >= b || q0*vn0 > b*rhat + un0) {
-        q0 = q0 - 1;
-        rhat = rhat + vn1;
-        if (rhat < b) goto again2;
-    }
-
-    *q = q1*b + q0;
-    *r = (un21*b + un0 - q0*v) >> s;
-}
-#endif /* END ANSI + LEGACY_COMPILER */
-
-/* END ANSI */
-#elif defined(ASM)
-static inline void
-_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
-{
-    mpd_uint_t h, l;
-
-    __asm__ ( "mull %3\n\t"
-              : "=d" (h), "=a" (l)
-              : "%a" (a), "rm" (b)
-              : "cc"
-    );
-
-    *hi = h;
-    *lo = l;
-}
-
-static inline void
-_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
-               mpd_uint_t d)
-{
-    mpd_uint_t qq, rr;
-
-    __asm__ ( "divl %4\n\t"
-              : "=a" (qq), "=d" (rr)
-              : "a" (lo), "d" (hi), "rm" (d)
-              : "cc"
-    );
-
-    *q = qq;
-    *r = rr;
-}
-/* END GCC ASM */
-#else
-  #error "need platform specific 64 bit multiplication and division"
-#endif
-
-#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
-static inline void
-_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
-{
-    assert(exp <= 9);
-
-    if (exp <= 4) {
-        switch (exp) {
-        case 0: *q = v; *r = 0; break;
-        case 1: DIVMOD(q, r, v, 10UL); break;
-        case 2: DIVMOD(q, r, v, 100UL); break;
-        case 3: DIVMOD(q, r, v, 1000UL); break;
-        case 4: DIVMOD(q, r, v, 10000UL); break;
-        }
-    }
-    else {
-        switch (exp) {
-        case 5: DIVMOD(q, r, v, 100000UL); break;
-        case 6: DIVMOD(q, r, v, 1000000UL); break;
-        case 7: DIVMOD(q, r, v, 10000000UL); break;
-        case 8: DIVMOD(q, r, v, 100000000UL); break;
-        case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */
-        }
-    }
-}
-/* END CONFIG_32 */
-
-/* NO CONFIG */
-#else
-  #error "define CONFIG_64 or CONFIG_32"
-#endif /* CONFIG */
-
-
 static inline void
 _mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
 {
@ -499,7 +214,6 @@ _mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
    *r = v - *q * d;
 }

-
 /** ------------------------------------------------------------
 **              Arithmetic with overflow checking
 ** ------------------------------------------------------------
@ -537,7 +251,6 @@ static inline mpd_size_t
 mul_size_t(mpd_size_t a, mpd_size_t b)
 {
    mpd_uint_t hi, lo;
-
    _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
    if (hi) {
        mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
@ -549,7 +262,6 @@ static inline mpd_size_t
 add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
 {
    mpd_size_t ret;
-
    *overflow = 0;
    ret = a + b;
    if (ret < a) *overflow = 1;
@ -560,7 +272,6 @@ static inline mpd_size_t
 mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
 {
    mpd_uint_t lo;
-
    _mpd_mul_words((mpd_uint_t *)overflow, &lo, (mpd_uint_t)a,
                   (mpd_uint_t)b);
    return lo;
@ -578,15 +289,9 @@ mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
 {
    mpd_uint_t hi, lo;
    mpd_uint_t q, r;
-
    _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
    _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m);
-
    return r;
 }

-
 #endif /* TYPEARITH_H */
-
-
-
--- a/third_party/python/Modules/_decimal/libmpdec/umodarith.h
+++ b/third_party/python/Modules/_decimal/libmpdec/umodarith.h
@ -1,20 +1,14 @@
 #ifndef UMODARITH_H
 #define UMODARITH_H
+#include "libc/log/libfatal.internal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/constants.h"
 #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h"
 #include "third_party/python/Modules/_decimal/libmpdec/typearith.h"
 /* clang-format off */

-
 /* Bignum: Low level routines for unsigned modular arithmetic. These are
   used in the fast convolution functions for very large coefficients. */

-
-/**************************************************************************/
-/*                        ANSI modular arithmetic                         */
-/**************************************************************************/
-
-
 /*
 * Restrictions: a < m and b < m
 * ACL2 proof: umodarith.lisp: addmod-correct
@ -23,11 +17,9 @@ static inline mpd_uint_t
 addmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
 {
    mpd_uint_t s;
-
    s = a + b;
    s = (s < a) ? s - m : s;
    s = (s >= m) ? s - m : s;
-
    return s;
 }

@ -39,10 +31,8 @@ static inline mpd_uint_t
 submod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
 {
    mpd_uint_t d;
-
    d = a - b;
    d = (a < b) ? d + m : d;
-
    return d;
 }

@ -54,13 +44,10 @@ static inline mpd_uint_t
 ext_submod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
 {
    mpd_uint_t d;
-
    a = (a >= m) ? a - m : a;
    b = (b >= m) ? b - m : b;
-
    d = a - b;
    d = (a < b) ? d + m : d;
-
    return d;
 }

@ -73,10 +60,8 @@ static inline mpd_uint_t
 dw_reduce(mpd_uint_t hi, mpd_uint_t lo, mpd_uint_t m)
 {
    mpd_uint_t r1, r2, w;
-
    _mpd_div_word(&w, &r1, hi, m);
    _mpd_div_words(&w, &r2, r1, lo, m);
-
    return r2;
 }

@ -89,142 +74,213 @@ static inline mpd_uint_t
 dw_submod(mpd_uint_t a, mpd_uint_t hi, mpd_uint_t lo, mpd_uint_t m)
 {
    mpd_uint_t d, r;
-
    r = dw_reduce(hi, lo, m);
    d = a - r;
    d = (a < r) ? d + m : d;
-
    return d;
 }

-#ifdef CONFIG_64
-
-/**************************************************************************/
-/*                        64-bit modular arithmetic                       */
-/**************************************************************************/
-
-/*
- * A proof of the algorithm is in literature/mulmod-64.txt. An ACL2
- * proof is in umodarith.lisp: section "Fast modular reduction".
+/**
+ * Calculates (a × b) % 𝑝 where 𝑝 is special
 *
- * Algorithm: calculate (a * b) % p:
+ * In the whole comment, "⩭" stands for "is congruent with".
 *
- *   a) hi, lo <- a * b       # Calculate a * b.
+ * Result of a × b in terms of high/low words:
 *
- *   b) hi, lo <-  R(hi, lo)  # Reduce modulo p.
+ *    (1) hi × 2⁶⁴ + lo = a × b
 *
- *   c) Repeat step b) until 0 <= hi * 2**64 + lo < 2*p.
+ * Special primes:
 *
- *   d) If the result is less than p, return lo. Otherwise return lo - p.
+ *    (2) 𝑝 = 2⁶⁴ - z + 1, where z = 2ⁿ
+ *
+ *        i.e. 0xfffffffffffffc01
+ *             0xfffffffffffff001
+ *             0xffffffffff000001
+ *             0xffffffff00000001
+ *             0xfffffffc00000001
+ *             0xffffff0000000001
+ *             0xffffff0000000001
+ *
+ * Single step modular reduction:
+ *
+ *    (3) R(hi, lo) = hi × z - hi + lo
+ *
+ *
+ * Strategy
+ * --------
+ *
+ *    a) Set (hi, lo) to the result of a × b.
+ *
+ *    b) Set (hi′, lo′) to the result of R(hi, lo).
+ *
+ *    c) Repeat step b) until 0 ≤ hi′ × 2⁶⁴ + lo′ < 𝟸×𝑝.
+ *
+ *    d) If the result is less than 𝑝, return lo′. Otherwise return lo′ - 𝑝.
+ *
+ *
+ * The reduction step b) preserves congruence
+ * ------------------------------------------
+ *
+ *     hi × 2⁶⁴ + lo ⩭ hi × z - hi + lo   (mod 𝑝)
+ *
+ *     Proof:
+ *     ~~~~~~
+ *
+ *        hi × 2⁶⁴ + lo = (2⁶⁴ - z + 1) × hi + z × hi - hi + lo
+ *
+ *                      = 𝑝 × hi             + z × hi - hi + lo
+ *
+ *                      ⩭ z × hi - hi + lo   (mod 𝑝)
+ *
+ *
+ * Maximum numbers of step b)
+ * --------------------------
+ *
+ * To avoid unnecessary formalism, define:
+ *
+ *     def R(hi, lo, z):
+ *          return divmod(hi * z - hi + lo, 2**64)
+ *
+ * For simplicity, assume hi=2⁶⁴-1, lo=2⁶⁴-1 after the
+ * initial multiplication a × b. This is of course impossible
+ * but certainly covers all cases.
+ *
+ * Then, for p1:
+ *
+ *     z  = 2³²
+ *     hi = 2⁶⁴-1
+ *     lo = 2⁶⁴-1
+ *     p1 = 2⁶⁴ - z + 1
+ *     hi, lo = R(hi, lo, z)    # First reduction
+ *     hi, lo = R(hi, lo, z)    # Second reduction
+ *     hi × 2⁶⁴ + lo < 2 × p1   # True
+ *
+ * For p2:
+ *
+ *     z  = 2³⁴
+ *     hi = 2⁶⁴-1
+ *     lo = 2⁶⁴-1
+ *     p2 = 2⁶⁴ - z + 1
+ *     hi, lo = R(hi, lo, z)    # First reduction
+ *     hi, lo = R(hi, lo, z)    # Second reduction
+ *     hi, lo = R(hi, lo, z)    # Third reduction
+ *     hi × 2⁶⁴ + lo < 2 × p2   # True
+ *
+ * For p3:
+ *
+ *     z  = 2⁴⁰
+ *     hi = 2⁶⁴-1
+ *     lo = 2⁶⁴-1
+ *     p3 = 2⁶⁴ - z + 1
+ *     hi, lo = R(hi, lo, z)    # First reduction
+ *     hi, lo = R(hi, lo, z)    # Second reduction
+ *     hi, lo = R(hi, lo, z)    # Third reduction
+ *     hi × 2⁶⁴ + lo < 2 × p3   # True
+ *
+ * Step d) preserves congruence and yields a result < 𝑝
+ * ----------------------------------------------------
+ *
+ * Case hi = 0:
+ *
+ *   Case lo < 𝑝: trivial.
+ *
+ *   Case lo ≥ 𝑝:
+ *
+ *     lo ⩭ lo - 𝑝   (mod 𝑝)             # result is congruent
+ *
+ *     𝑝 ≤ lo < 𝟸×𝑝  →  0 ≤ lo - 𝑝 < 𝑝   # result is in the correct range
+ *
+ * Case hi = 1:
+ *
+ *     𝑝 < 2⁶⁴ Λ 2⁶⁴ + lo < 𝟸×𝑝  →  lo < 𝑝   # lo is always less than 𝑝
+ *
+ *     2⁶⁴ + lo ⩭ 2⁶⁴ + (lo - 𝑝)    (mod 𝑝)  # result is congruent
+ *
+ *              = lo - 𝑝   # exactly the same value as the previous RHS
+ *                         # in uint64_t arithmetic.
+ *
+ *     𝑝 < 2⁶⁴ + lo < 𝟸×𝑝  →  0 < 2⁶⁴ + (lo - 𝑝) < 𝑝  # correct range
+ *
+ *
+ * [1] http://www.apfloat.org/apfloat/2.40/apfloat.pdf
 */
-
 static inline mpd_uint_t
 x64_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
 {
    mpd_uint_t hi, lo, x, y;
-
-
    _mpd_mul_words(&hi, &lo, a, b);
-
    if (m & (1ULL<<32)) { /* P1 */
-
        /* first reduction */
        x = y = hi;
        hi >>= 32;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 32;
        lo = y + x;
        if (lo < y) hi++;
-
        /* second reduction */
        x = y = hi;
        hi >>= 32;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 32;
        lo = y + x;
        if (lo < y) hi++;
-
-        return (hi || lo >= m ? lo - m : lo);
+        return hi || lo >= m ? lo - m : lo;
    }
    else if (m & (1ULL<<34)) { /* P2 */
-
        /* first reduction */
        x = y = hi;
        hi >>= 30;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 34;
        lo = y + x;
        if (lo < y) hi++;
-
        /* second reduction */
        x = y = hi;
        hi >>= 30;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 34;
        lo = y + x;
        if (lo < y) hi++;
-
        /* third reduction */
        x = y = hi;
        hi >>= 30;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 34;
        lo = y + x;
        if (lo < y) hi++;
-
-        return (hi || lo >= m ? lo - m : lo);
+        return hi || lo >= m ? lo - m : lo;
    }
    else { /* P3 */
-
        /* first reduction */
        x = y = hi;
        hi >>= 24;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 40;
        lo = y + x;
        if (lo < y) hi++;
-
        /* second reduction */
        x = y = hi;
        hi >>= 24;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 40;
        lo = y + x;
        if (lo < y) hi++;
-
        /* third reduction */
        x = y = hi;
        hi >>= 24;
-
        x = lo - x;
        if (x > lo) hi--;
-
        y <<= 40;
        lo = y + x;
        if (lo < y) hi++;
-
-        return (hi || lo >= m ? lo - m : lo);
+        return hi || lo >= m ? lo - m : lo;
    }
 }

@ -247,375 +303,13 @@ static inline mpd_uint_t
 x64_powmod(mpd_uint_t base, mpd_uint_t exp, mpd_uint_t umod)
 {
    mpd_uint_t r = 1;
-
    while (exp > 0) {
        if (exp & 1)
            r = x64_mulmod(r, base, umod);
        base = x64_mulmod(base, base, umod);
        exp >>= 1;
    }
-
    return r;
 }

-/* END CONFIG_64 */
-#else /* CONFIG_32 */
-
-
-/**************************************************************************/
-/*                        32-bit modular arithmetic                       */
-/**************************************************************************/
-
-#if defined(ANSI)
-#if !defined(LEGACY_COMPILER)
-/* HAVE_UINT64_T */
-static inline mpd_uint_t
-std_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
-{
-    return ((mpd_uuint_t) a * b) % m;
-}
-
-static inline void
-std_mulmod2c(mpd_uint_t *a, mpd_uint_t *b, mpd_uint_t w, mpd_uint_t m)
-{
-    *a = ((mpd_uuint_t) *a * w) % m;
-    *b = ((mpd_uuint_t) *b * w) % m;
-}
-
-static inline void
-std_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
-            mpd_uint_t m)
-{
-    *a0 = ((mpd_uuint_t) *a0 * b0) % m;
-    *a1 = ((mpd_uuint_t) *a1 * b1) % m;
-}
-/* END HAVE_UINT64_T */
-#else
-/* LEGACY_COMPILER */
-static inline mpd_uint_t
-std_mulmod(mpd_uint_t a, mpd_uint_t b, mpd_uint_t m)
-{
-    mpd_uint_t hi, lo, q, r;
-    _mpd_mul_words(&hi, &lo, a, b);
-    _mpd_div_words(&q, &r, hi, lo, m);
-    return r;
-}
-
-static inline void
-std_mulmod2c(mpd_uint_t *a, mpd_uint_t *b, mpd_uint_t w, mpd_uint_t m)
-{
-    *a = std_mulmod(*a, w, m);
-    *b = std_mulmod(*b, w, m);
-}
-
-static inline void
-std_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
-            mpd_uint_t m)
-{
-    *a0 = std_mulmod(*a0, b0, m);
-    *a1 = std_mulmod(*a1, b1, m);
-}
-/* END LEGACY_COMPILER */
-#endif
-
-static inline mpd_uint_t
-std_powmod(mpd_uint_t base, mpd_uint_t exp, mpd_uint_t umod)
-{
-    mpd_uint_t r = 1;
-
-    while (exp > 0) {
-        if (exp & 1)
-            r = std_mulmod(r, base, umod);
-        base = std_mulmod(base, base, umod);
-        exp >>= 1;
-    }
-
-    return r;
-}
-#endif /* ANSI CONFIG_32 */
-
-
-/**************************************************************************/
-/*                    Pentium Pro modular arithmetic                      */
-/**************************************************************************/
-
-/*
- * A proof of the algorithm is in literature/mulmod-ppro.txt. The FPU
- * control word must be set to 64-bit precision and truncation mode
- * prior to using these functions.
- *
- * Algorithm: calculate (a * b) % p:
- *
- *   p    := prime < 2**31
- *   pinv := (long double)1.0 / p (precalculated)
- *
- *   a) n = a * b              # Calculate exact product.
- *   b) qest = n * pinv        # Calculate estimate for q = n / p.
- *   c) q = (qest+2**63)-2**63 # Truncate qest to the exact quotient.
- *   d) r = n - q * p          # Calculate remainder.
- *
- * Remarks:
- *
- *   - p = dmod and pinv = dinvmod.
- *   - dinvmod points to an array of three uint32_t, which is interpreted
- *     as an 80 bit long double by fldt.
- *   - Intel compilers prior to version 11 do not seem to handle the
- *     __GNUC__ inline assembly correctly.
- *   - random tests are provided in tests/extended/ppro_mulmod.c
- */
-
-#if defined(PPRO)
-#if defined(ASM)
-
-/* Return (a * b) % dmod */
-static inline mpd_uint_t
-ppro_mulmod(mpd_uint_t a, mpd_uint_t b, double *dmod, uint32_t *dinvmod)
-{
-    mpd_uint_t retval;
-
-    __asm__ (
-            "fildl  %2\n\t"
-            "fildl  %1\n\t"
-            "fmulp  %%st, %%st(1)\n\t"
-            "fldt   (%4)\n\t"
-            "fmul   %%st(1), %%st\n\t"
-            "flds   %5\n\t"
-            "fadd   %%st, %%st(1)\n\t"
-            "fsubrp %%st, %%st(1)\n\t"
-            "fldl   (%3)\n\t"
-            "fmulp  %%st, %%st(1)\n\t"
-            "fsubrp %%st, %%st(1)\n\t"
-            "fistpl %0\n\t"
-            : "=m" (retval)
-            : "m" (a), "m" (b), "r" (dmod), "r" (dinvmod), "m" (MPD_TWO63)
-            : "st", "memory"
-    );
-
-    return retval;
-}
-
-/*
- * Two modular multiplications in parallel:
- *      *a0 = (*a0 * w) % dmod
- *      *a1 = (*a1 * w) % dmod
- */
-static inline void
-ppro_mulmod2c(mpd_uint_t *a0, mpd_uint_t *a1, mpd_uint_t w,
-              double *dmod, uint32_t *dinvmod)
-{
-    __asm__ (
-            "fildl  %2\n\t"
-            "fildl  (%1)\n\t"
-            "fmul   %%st(1), %%st\n\t"
-            "fxch   %%st(1)\n\t"
-            "fildl  (%0)\n\t"
-            "fmulp  %%st, %%st(1) \n\t"
-            "fldt   (%4)\n\t"
-            "flds   %5\n\t"
-            "fld    %%st(2)\n\t"
-            "fmul   %%st(2)\n\t"
-            "fadd   %%st(1)\n\t"
-            "fsub   %%st(1)\n\t"
-            "fmull  (%3)\n\t"
-            "fsubrp %%st, %%st(3)\n\t"
-            "fxch   %%st(2)\n\t"
-            "fistpl (%0)\n\t"
-            "fmul   %%st(2)\n\t"
-            "fadd   %%st(1)\n\t"
-            "fsubp  %%st, %%st(1)\n\t"
-            "fmull  (%3)\n\t"
-            "fsubrp %%st, %%st(1)\n\t"
-            "fistpl (%1)\n\t"
-            : : "r" (a0), "r" (a1), "m" (w),
-                "r" (dmod), "r" (dinvmod),
-                "m" (MPD_TWO63)
-            : "st", "memory"
-    );
-}
-
-/*
- * Two modular multiplications in parallel:
- *      *a0 = (*a0 * b0) % dmod
- *      *a1 = (*a1 * b1) % dmod
- */
-static inline void
-ppro_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
-             double *dmod, uint32_t *dinvmod)
-{
-    __asm__ (
-            "fildl  %3\n\t"
-            "fildl  (%2)\n\t"
-            "fmulp  %%st, %%st(1)\n\t"
-            "fildl  %1\n\t"
-            "fildl  (%0)\n\t"
-            "fmulp  %%st, %%st(1)\n\t"
-            "fldt   (%5)\n\t"
-            "fld    %%st(2)\n\t"
-            "fmul   %%st(1), %%st\n\t"
-            "fxch   %%st(1)\n\t"
-            "fmul   %%st(2), %%st\n\t"
-            "flds   %6\n\t"
-            "fldl   (%4)\n\t"
-            "fxch   %%st(3)\n\t"
-            "fadd   %%st(1), %%st\n\t"
-            "fxch   %%st(2)\n\t"
-            "fadd   %%st(1), %%st\n\t"
-            "fxch   %%st(2)\n\t"
-            "fsub   %%st(1), %%st\n\t"
-            "fxch   %%st(2)\n\t"
-            "fsubp  %%st, %%st(1)\n\t"
-            "fxch   %%st(1)\n\t"
-            "fmul   %%st(2), %%st\n\t"
-            "fxch   %%st(1)\n\t"
-            "fmulp  %%st, %%st(2)\n\t"
-            "fsubrp %%st, %%st(3)\n\t"
-            "fsubrp %%st, %%st(1)\n\t"
-            "fxch   %%st(1)\n\t"
-            "fistpl (%2)\n\t"
-            "fistpl (%0)\n\t"
-            : : "r" (a0), "m" (b0), "r" (a1), "m" (b1),
-                "r" (dmod), "r" (dinvmod),
-                "m" (MPD_TWO63)
-            : "st", "memory"
-    );
-}
-/* END PPRO GCC ASM */
-#elif defined(MASM)
-
-/* Return (a * b) % dmod */
-static inline mpd_uint_t __cdecl
-ppro_mulmod(mpd_uint_t a, mpd_uint_t b, double *dmod, uint32_t *dinvmod)
-{
-    mpd_uint_t retval;
-
-    __asm {
-        mov     eax, dinvmod
-        mov     edx, dmod
-        fild    b
-        fild    a
-        fmulp   st(1), st
-        fld     TBYTE PTR [eax]
-        fmul    st, st(1)
-        fld     MPD_TWO63
-        fadd    st(1), st
-        fsubp   st(1), st
-        fld     QWORD PTR [edx]
-        fmulp   st(1), st
-        fsubp   st(1), st
-        fistp   retval
-    }
-
-    return retval;
-}
-
-/*
- * Two modular multiplications in parallel:
- *      *a0 = (*a0 * w) % dmod
- *      *a1 = (*a1 * w) % dmod
- */
-static inline mpd_uint_t __cdecl
-ppro_mulmod2c(mpd_uint_t *a0, mpd_uint_t *a1, mpd_uint_t w,
-              double *dmod, uint32_t *dinvmod)
-{
-    __asm {
-        mov     ecx, dmod
-        mov     edx, a1
-        mov     ebx, dinvmod
-        mov     eax, a0
-        fild    w
-        fild    DWORD PTR [edx]
-        fmul    st, st(1)
-        fxch    st(1)
-        fild    DWORD PTR [eax]
-        fmulp   st(1), st
-        fld     TBYTE PTR [ebx]
-        fld     MPD_TWO63
-        fld     st(2)
-        fmul    st, st(2)
-        fadd    st, st(1)
-        fsub    st, st(1)
-        fmul    QWORD PTR [ecx]
-        fsubp   st(3), st
-        fxch    st(2)
-        fistp   DWORD PTR [eax]
-        fmul    st, st(2)
-        fadd    st, st(1)
-        fsubrp  st(1), st
-        fmul    QWORD PTR [ecx]
-        fsubp   st(1), st
-        fistp   DWORD PTR [edx]
-    }
-}
-
-/*
- * Two modular multiplications in parallel:
- *      *a0 = (*a0 * b0) % dmod
- *      *a1 = (*a1 * b1) % dmod
- */
-static inline void __cdecl
-ppro_mulmod2(mpd_uint_t *a0, mpd_uint_t b0, mpd_uint_t *a1, mpd_uint_t b1,
-             double *dmod, uint32_t *dinvmod)
-{
-    __asm {
-        mov     ecx, dmod
-        mov     edx, a1
-        mov     ebx, dinvmod
-        mov     eax, a0
-        fild    b1
-        fild    DWORD PTR [edx]
-        fmulp   st(1), st
-        fild    b0
-        fild    DWORD PTR [eax]
-        fmulp   st(1), st
-        fld     TBYTE PTR [ebx]
-        fld     st(2)
-        fmul    st, st(1)
-        fxch    st(1)
-        fmul    st, st(2)
-        fld     DWORD PTR MPD_TWO63
-        fld     QWORD PTR [ecx]
-        fxch    st(3)
-        fadd    st, st(1)
-        fxch    st(2)
-        fadd    st, st(1)
-        fxch    st(2)
-        fsub    st, st(1)
-        fxch    st(2)
-        fsubrp  st(1), st
-        fxch    st(1)
-        fmul    st, st(2)
-        fxch    st(1)
-        fmulp   st(2), st
-        fsubp   st(3), st
-        fsubp   st(1), st
-        fxch    st(1)
-        fistp   DWORD PTR [edx]
-        fistp   DWORD PTR [eax]
-    }
-}
-#endif /* PPRO MASM (_MSC_VER) */
-
-
-/* Return (base ** exp) % dmod */
-static inline mpd_uint_t
-ppro_powmod(mpd_uint_t base, mpd_uint_t exp, double *dmod, uint32_t *dinvmod)
-{
-    mpd_uint_t r = 1;
-
-    while (exp > 0) {
-        if (exp & 1)
-            r = ppro_mulmod(r, base, dmod, dinvmod);
-        base = ppro_mulmod(base, base, dmod, dinvmod);
-        exp >>= 1;
-    }
-
-    return r;
-}
-#endif /* PPRO */
-#endif /* CONFIG_32 */
-
-
 #endif /* UMODARITH_H */
-
-
-