mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
23e235b7a5
This change integrates e58abc1110b335a3341e8ad5821ad8e3880d9bb2 from https://github.com/ahgamut/musl-cross-make/ which fixes the issues we were having with our C language extension for symbolic constants. This change also performs some code cleanup and bug fixes to getaddrinfo(). It's now possible to compile projects like ncurses, readline and python without needing to patch anything upstream, except maybe a line or two. Pretty soon it should be possible to build a Linux distro on Cosmo.
164 lines
5.9 KiB
C
164 lines
5.9 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/nexgen32e/x86feature.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/str/str.h"
|
|
#include "third_party/mbedtls/bignum_internal.h"
|
|
#include "third_party/mbedtls/math.h"
|
|
#include "third_party/mbedtls/platform.h"
|
|
|
|
forceinline int Cmp(uint64_t *a, uint64_t *b, size_t n) {
|
|
size_t i;
|
|
uint64_t x, y;
|
|
while (n--) {
|
|
x = a[n];
|
|
y = b[n];
|
|
if (x != y) {
|
|
return x > y ? 1 : -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
forceinline bool Sub(uint64_t *C, uint64_t *A, uint64_t *B, size_t n) {
|
|
bool cf;
|
|
uint64_t c, i;
|
|
#ifdef __x86_64__
|
|
asm volatile("xor\t%1,%1\n\t"
|
|
".align\t16\n1:\t"
|
|
"mov\t(%5,%3,8),%1\n\t"
|
|
"sbb\t(%6,%3,8),%1\n\t"
|
|
"mov\t%1,(%4,%3,8)\n\t"
|
|
"lea\t1(%3),%3\n\t"
|
|
"dec\t%2\n\t"
|
|
"jnz\t1b"
|
|
: "=@ccb"(cf), "=&r"(c), "+c"(n), "=r"(i)
|
|
: "r"(C), "r"(A), "r"(B), "3"(0)
|
|
: "cc", "memory");
|
|
#else
|
|
for (cf = false, c = i = 0; i < n; ++i) {
|
|
SBB(C[i], A[i], B[i], cf, cf);
|
|
}
|
|
#endif
|
|
return cf;
|
|
}
|
|
|
|
forceinline bool Add(uint64_t *C, uint64_t *A, uint64_t *B, size_t n) {
|
|
bool cf;
|
|
uint64_t c, i;
|
|
#ifdef __x86_64__
|
|
asm volatile("xor\t%1,%1\n\t"
|
|
".align\t16\n1:\t"
|
|
"mov\t(%5,%3,8),%1\n\t"
|
|
"adc\t(%6,%3,8),%1\n\t"
|
|
"mov\t%1,(%4,%3,8)\n\t"
|
|
"lea\t1(%3),%3\n\t"
|
|
"dec\t%2\n\t"
|
|
"jnz\t1b"
|
|
: "=@ccc"(cf), "=&r"(c), "+c"(n), "=r"(i)
|
|
: "r"(C), "r"(A), "r"(B), "3"(0)
|
|
: "cc", "memory");
|
|
#else
|
|
for (cf = false, c = i = 0; i < n; ++i) {
|
|
ADC(C[i], A[i], B[i], cf, cf);
|
|
}
|
|
#endif
|
|
return cf;
|
|
}
|
|
|
|
/**
|
|
* Multiplies huge numbers faster.
|
|
*
|
|
* For 4096 bit numbers it's twice as fast.
|
|
* For 16384 bit numbers it's thrice as fast.
|
|
*/
|
|
void Karatsuba(uint64_t *C, uint64_t *A, uint64_t *B, size_t n, uint64_t *K) {
|
|
int q, r;
|
|
size_t i;
|
|
uint64_t c, t;
|
|
uint64_t *x, *y;
|
|
if (n == 8) {
|
|
#ifdef __x86_64__
|
|
if (X86_HAVE(BMI2) && X86_HAVE(ADX)) {
|
|
Mul8x8Adx(C, A, B);
|
|
return;
|
|
}
|
|
#endif
|
|
Mul(C, A, 8, B, 8);
|
|
return;
|
|
}
|
|
switch (Cmp(A, A + n / 2, n / 2) * 3 + Cmp(B + n / 2, B, n / 2)) {
|
|
case -1 * 3 + +0:
|
|
case +0 * 3 + -1:
|
|
case +0 * 3 + +0:
|
|
case +0 * 3 + +1:
|
|
case +1 * 3 + +0:
|
|
Karatsuba(C, A, B, n / 2, K + n * 2);
|
|
Karatsuba(C + n, A + n / 2, B + n / 2, n / 2, K + n * 2);
|
|
c = Add(K, C, C + n, n);
|
|
c += Add(C + n / 2, C + n / 2, K, n);
|
|
break;
|
|
case -1 * 3 + -1:
|
|
Sub(K, A + n / 2, A, n / 2);
|
|
Sub(K + n / 2, B, B + n / 2, n / 2);
|
|
Karatsuba(K + n, K, K + n / 2, n / 2, K + n * 2);
|
|
Karatsuba(C, A, B, n / 2, K + n * 2);
|
|
Karatsuba(C + n, A + n / 2, B + n / 2, n / 2, K + n * 2);
|
|
c = Add(K, C, C + n, n);
|
|
c += Add(K + n, K, K + n, n);
|
|
c += Add(C + n / 2, C + n / 2, K + n, n);
|
|
break;
|
|
case -1 * 3 + +1:
|
|
Sub(K, A + n / 2, A, n / 2);
|
|
Sub(K + n / 2, B + n / 2, B, n / 2);
|
|
Karatsuba(K + n, K, K + n / 2, n / 2, K + n * 2);
|
|
Karatsuba(C, A, B, n / 2, K + n * 2);
|
|
Karatsuba(C + n, A + n / 2, B + n / 2, n / 2, K + n * 2);
|
|
c = Add(K, C, C + n, n);
|
|
c -= Sub(K + n, K, K + n, n);
|
|
c += Add(C + n / 2, C + n / 2, K + n, n);
|
|
break;
|
|
case +1 * 3 + -1:
|
|
Sub(K, A, A + n / 2, n / 2);
|
|
Sub(K + n / 2, B, B + n / 2, n / 2);
|
|
Karatsuba(K + n, K, K + n / 2, n / 2, K + n * 2);
|
|
Karatsuba(C, A, B, n / 2, K + n * 2);
|
|
Karatsuba(C + n, A + n / 2, B + n / 2, n / 2, K + n * 2);
|
|
c = Add(K, C, C + n, n);
|
|
c -= Sub(K + n, K, K + n, n);
|
|
c += Add(C + n / 2, C + n / 2, K + n, n);
|
|
break;
|
|
case +1 * 3 + +1:
|
|
Sub(K, A, A + n / 2, n / 2);
|
|
Sub(K + n / 2, B + n / 2, B, n / 2);
|
|
Karatsuba(K + n, K, K + n / 2, n / 2, K + n * 2);
|
|
Karatsuba(C, A, B, n / 2, K + n * 2);
|
|
Karatsuba(C + n, A + n / 2, B + n / 2, n / 2, K + n * 2);
|
|
c = Add(K, C, C + n, n);
|
|
c += Add(K + n, K, K + n, n);
|
|
c += Add(C + n / 2, C + n / 2, K + n, n);
|
|
break;
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
for (i = n / 2 + n; c && i < n + n; i++) {
|
|
t = C[i];
|
|
c = (C[i] = t + c) < t;
|
|
}
|
|
}
|