Improve new C23 checked arithmetic feature

This commit is contained in:
Justine Tunney 2023-06-16 15:32:18 -07:00
parent 2a1c588826
commit 4eebd6b9dc
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
10 changed files with 341 additions and 15 deletions

63
libc/intrin/divmodti4.c Normal file
View file

@ -0,0 +1,63 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
/**
* Divides 128-bit signed integers w/ remainder.
*
* @param a is numerator
* @param b is denominator
* @param opt_out_rem receives euclidean division remainder if not null
* @return quotient or result of division
* @note rounds towards zero
*/
COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, tu_int *opt_out_rem) {
int k;
tu_int r;
ti_int sa, sb, sq, sr, x, y, q;
k = sizeof(ti_int) * CHAR_BIT - 1;
if (b == -1 && a == ((ti_int)1 << k)) {
volatile int x = 0;
x = 1 / x; // raise(SIGFPE)
}
sa = a >> k; // sa = a < 0 ? -1 : 0
sb = b >> k; // sb = b < 0 ? -1 : 0
x = (a ^ sa) - sa; // negate if sa == -1
y = (b ^ sb) - sb; // negate if sb == -1
sq = sa ^ sb; // sign of quotient
sr = sa; // sign of remainder
q = __udivmodti4(x, y, &r); // unsigned divide
q = (q ^ sq) - sq; // fix quotient sign
r = (r ^ sr) - sr; // fix remainder sign
if (opt_out_rem) *opt_out_rem = r;
return q;
}
/*
Intel Kabylake i9-9900 @ 3.10GHz Client Grade
idiv32 l: 27𝑐 9𝑛𝑠
idiv64 l: 27𝑐 9𝑛𝑠
divmodti4 small / small l: 42𝑐 14𝑛𝑠
divmodti4 small / large l: 14𝑐 5𝑛𝑠
divmodti4 large / small l: 92𝑐 30𝑛𝑠
divmodti4 large / large l: 209𝑐 68𝑛𝑠
Intel Kabylake i3-8100 @ 3.60GHz Client Grade
idiv32 l: 51𝑐 14𝑛𝑠
idiv64 l: 51𝑐 14𝑛𝑠
divmodti4 small / small l: 83𝑐 23𝑛𝑠
divmodti4 small / large l: 26𝑐 7𝑛𝑠
divmodti4 large / small l: 175𝑐 48𝑛𝑠
divmodti4 large / large l: 389𝑐 107𝑛𝑠
*/

22
libc/intrin/divti3.c Normal file
View file

@ -0,0 +1,22 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
/**
* Divides 128-bit signed integers.
*
* @param a is numerator
* @param b is denominator
* @return quotient or result of division
* @note rounds towards zero
*/
COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
return __divmodti4(a, b, NULL);
}

59
libc/intrin/mulodi4.c Normal file
View file

@ -0,0 +1,59 @@
/* clang-format off */
/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __mulodi4 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
#include "third_party/compiler_rt/int_lib.h"
/* Returns: a * b */
/* Effects: sets *overflow to 1 if a * b overflows */
COMPILER_RT_ABI di_int
__mulodi4(di_int a, di_int b, int* overflow)
{
const int N = (int)(sizeof(di_int) * CHAR_BIT);
const di_int MIN = (du_int)1 << (N-1);
const di_int MAX = ~MIN;
*overflow = 0;
di_int result = (du_int)a * (du_int)b;
if (a == MIN)
{
if (b != 0 && b != 1)
*overflow = 1;
return result;
}
if (b == MIN)
{
if (a != 0 && a != 1)
*overflow = 1;
return result;
}
di_int sa = a >> (N - 1);
di_int abs_a = (a ^ sa) - sa;
di_int sb = b >> (N - 1);
di_int abs_b = (b ^ sb) - sb;
if (abs_a < 2 || abs_b < 2)
return result;
if (sa == sb)
{
if (abs_a > MAX / abs_b)
*overflow = 1;
}
else
{
if (abs_a > MIN / -abs_b)
*overflow = 1;
}
return result;
}

59
libc/intrin/mulosi4.c Normal file
View file

@ -0,0 +1,59 @@
/* clang-format off */
/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __mulosi4 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
#include "third_party/compiler_rt/int_lib.h"
/* Returns: a * b */
/* Effects: sets *overflow to 1 if a * b overflows */
COMPILER_RT_ABI si_int
__mulosi4(si_int a, si_int b, int* overflow)
{
const int N = (int)(sizeof(si_int) * CHAR_BIT);
const si_int MIN = (su_int)1 << (N-1);
const si_int MAX = ~MIN;
*overflow = 0;
si_int result = (su_int)a * (su_int)b;
if (a == MIN)
{
if (b != 0 && b != 1)
*overflow = 1;
return result;
}
if (b == MIN)
{
if (a != 0 && a != 1)
*overflow = 1;
return result;
}
si_int sa = a >> (N - 1);
si_int abs_a = (a ^ sa) - sa;
si_int sb = b >> (N - 1);
si_int abs_b = (b ^ sb) - sb;
if (abs_a < 2 || abs_b < 2)
return result;
if (sa == sb)
{
if (abs_a > MAX / abs_b)
*overflow = 1;
}
else
{
if (abs_a > MIN / -abs_b)
*overflow = 1;
}
return result;
}

63
libc/intrin/muloti4.c Normal file
View file

@ -0,0 +1,63 @@
/* clang-format off */
/*===-- muloti4.c - Implement __muloti4 -----------------------------------===
*
* The LLVM Compiler Infrastructure
*
* This file is dual licensed under the MIT and the University of Illinois Open
* Source Licenses. See LICENSE.TXT for details.
*
* ===----------------------------------------------------------------------===
*
* This file implements __muloti4 for the compiler_rt library.
*
* ===----------------------------------------------------------------------===
*/
#include "third_party/compiler_rt/int_lib.h"
#ifdef CRT_HAS_128BIT
/* Returns: a * b */
/* Effects: sets *overflow to 1 if a * b overflows */
COMPILER_RT_ABI ti_int
__muloti4(ti_int a, ti_int b, int* overflow)
{
const int N = (int)(sizeof(ti_int) * CHAR_BIT);
const ti_int MIN = (tu_int)1 << (N-1);
const ti_int MAX = ~MIN;
*overflow = 0;
ti_int result = (tu_int)a * (tu_int)b;
if (a == MIN)
{
if (b != 0 && b != 1)
*overflow = 1;
return result;
}
if (b == MIN)
{
if (a != 0 && a != 1)
*overflow = 1;
return result;
}
ti_int sa = a >> (N - 1);
ti_int abs_a = (a ^ sa) - sa;
ti_int sb = b >> (N - 1);
ti_int abs_b = (b ^ sb) - sb;
if (abs_a < 2 || abs_b < 2)
return result;
if (sa == sb)
{
if (abs_a > MAX / abs_b)
*overflow = 1;
}
else
{
if (abs_a > MIN / -abs_b)
*overflow = 1;
}
return result;
}
#endif /* CRT_HAS_128BIT */

137
libc/intrin/udivmodti4.c Normal file
View file

@ -0,0 +1,137 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to division,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "third_party/compiler_rt/int_lib.h"
/**
* Returns 128 bit division result by 64 bit.
*
* Result must fit in 64 bits. Remainder is stored in r.
*
* @see libdivide libdivide_128_div_64_to_64() division fallback
* @see Knuth, Volume 2, section 4.3.1, Algorithm D for correctness proof
* @see https://danlark.org/2020/06/14/128-bit-division/
*/
forceinline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
du_int *r) {
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
const du_int b = 1ULL << (n_udword_bits / 2); // Number base (32 bits)
du_int un1, un0; // Norm. dividend LSD's
du_int vn1, vn0; // Norm. divisor digits
du_int q1, q0; // Quotient digits
du_int un64, un21, un10; // Dividend digit pairs
du_int rhat; // Remainder
si_int s; // Normalization shift
s = __builtin_clzll(v);
if (s > 0) {
// Normalize the divisor.
v = v << s;
un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
un10 = u0 << s; // Shift dividend left
} else {
// Avoid undefined behavior of (u0 >> 64).
un64 = u1;
un10 = u0;
}
// Break divisor up into two 32-bit digits.
vn1 = v >> (n_udword_bits / 2);
vn0 = v & 0xFFFFFFFF;
// Break right half of dividend into two digits.
un1 = un10 >> (n_udword_bits / 2);
un0 = un10 & 0xFFFFFFFF;
// Compute the first quotient digit, q1.
q1 = un64 / vn1;
rhat = un64 - q1 * vn1;
// q1 has at most error 2. No more than 2 iterations.
while (q1 >= b || q1 * vn0 > b * rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat >= b) break;
}
un21 = un64 * b + un1 - q1 * v;
// Compute the second quotient digit.
q0 = un21 / vn1;
rhat = un21 - q0 * vn1;
// q0 has at most error 2. No more than 2 iterations.
while (q0 >= b || q0 * vn0 > b * rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat >= b) break;
}
*r = (un21 * b + un0 - q0 * v) >> s;
return q1 * b + q0;
}
forceinline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) {
#ifdef __x86_64__
du_int result;
asm("div\t%2" : "=a"(result), "=d"(*r) : "r"(v), "0"(u0), "1"(u1) : "cc");
return result;
#else
return udiv128by64to64default(u1, u0, v, r);
#endif
}
/**
* Performs 128-bit unsigned division and remainder.
*
* @param a is dividend
* @param b is divisor
* @param rem receives remainder if not NULL
*/
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
utwords dividend, divisor, quotient, remainder;
si_int shift;
dividend.all = a;
divisor.all = b;
if (divisor.all > dividend.all) {
if (rem) *rem = dividend.all;
return 0;
}
// When the divisor fits in 64 bits, we can use an optimized path.
if (divisor.s.high == 0) {
remainder.s.high = 0;
if (dividend.s.high < divisor.s.low) {
// The result fits in 64 bits.
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
quotient.s.high = 0;
} else {
// First, divide with the high part to get the remainder in
// dividend.s.high. After that dividend.s.high < divisor.s.low.
quotient.s.high = dividend.s.high / divisor.s.low;
dividend.s.high = dividend.s.high % divisor.s.low;
quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
divisor.s.low, &remainder.s.low);
}
if (rem) *rem = remainder.all;
return quotient.all;
}
// 0 <= shift <= 63.
shift = __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
divisor.all <<= shift;
quotient.s.high = 0;
quotient.s.low = 0;
for (; shift >= 0; --shift) {
quotient.s.low <<= 1;
// Branch free version of.
// if (dividend.all >= divisor.all)
// {
// dividend.all -= divisor.all;
// carry = 1;
// }
ti_int s = (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
quotient.s.low |= s & 1;
dividend.all -= divisor.all & s;
divisor.all >>= 1;
}
if (rem) *rem = dividend.all;
return quotient.all;
}

View file

@ -1,8 +1,93 @@
#ifndef COSMOPOLITAN_LIBC_STDCKDINT_H_
#define COSMOPOLITAN_LIBC_STDCKDINT_H_
#if !defined(MODE_DBG) && \
((defined(__GNUC__) && __GNUC__ >= 5 && !defined(__ICC)) || \
(defined(__has_builtin) && (__has_builtin(__builtin_add_overflow) && \
__has_builtin(__builtin_sub_overflow) && \
__has_builtin(__builtin_mul_overflow))))
#define ckd_add(R, A, B) __builtin_add_overflow((A), (B), (R))
#define ckd_sub(R, A, B) __builtin_sub_overflow((A), (B), (R))
#define ckd_mul(R, A, B) __builtin_mul_overflow((A), (B), (R))
#define ckd_add(res, x, y) __builtin_add_overflow((x), (y), (res))
#define ckd_sub(res, x, y) __builtin_sub_overflow((x), (y), (res))
#define ckd_mul(res, x, y) __builtin_mul_overflow((x), (y), (res))
#else
#define ckd_add(res, x, y) __ckd_arithmetic(add, res, x, y)
#define ckd_sub(res, x, y) __ckd_arithmetic(sub, res, x, y)
#define ckd_mul(res, x, y) __ckd_arithmetic(mul, res, x, y)
#if defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 8 && \
((defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 406) || \
defined(__llvm__)) && \
!defined(__STRICT_ANSI__)
#define __ckd_dword __int128
#else
#define __ckd_dword long long
#endif
#define __ckd_arithmetic(op, res, x, y) \
(sizeof(*(res)) == sizeof(int) ? __ckd_##op((int *)(res), (x), (y)) \
: sizeof(*(res)) == sizeof(long) ? __ckd_##op##l((long *)(res), (x), (y)) \
: sizeof(*(res)) == sizeof(__ckd_dword) \
? __ckd_##op##ll((__ckd_dword *)(res), (x), (y)) \
: __ckd_trap())
__funline int __ckd_trap(void) {
volatile int __x = 0;
return 1 / __x;
}
__funline int __ckd_add(int *__z, int __x, int __y) {
unsigned int __a, __b, __c;
*__z = __c = (__a = __x) + (__b = __y);
return ((__c ^ __a) & (__c ^ __b)) >> (sizeof(int) * CHAR_BIT - 1);
}
__funline int __ckd_addl(long *__z, long __x, long __y) {
unsigned long __a, __b, __c;
*__z = __c = (__a = __x) + (__b = __y);
return ((__c ^ __a) & (__c ^ __b)) >> (sizeof(long) * CHAR_BIT - 1);
}
__funline int __ckd_addll(__ckd_dword *__z, __ckd_dword __x, __ckd_dword __y) {
unsigned __ckd_dword __a, __b, __c;
*__z = __c = (__a = __x) + (__b = __y);
return ((__c ^ __a) & (__c ^ __b)) >> (sizeof(__ckd_dword) * CHAR_BIT - 1);
}
__funline int __ckd_sub(int *__z, int __x, int __y) {
unsigned int __a, __b, __c;
*__z = __c = (__a = __x) - (__b = __y);
return ((__a ^ __b) & (__c ^ __a)) >> (sizeof(int) * CHAR_BIT - 1);
}
__funline int __ckd_subl(long *__z, long __x, long __y) {
unsigned long __a, __b, __c;
*__z = __c = (__a = __x) - (__b = __y);
return ((__a ^ __b) & (__c ^ __a)) >> (sizeof(long) * CHAR_BIT - 1);
}
__funline int __ckd_subll(__ckd_dword *__z, __ckd_dword __x, __ckd_dword __y) {
unsigned __ckd_dword __a, __b, __c;
*__z = __c = (__a = __x) - (__b = __y);
return ((__a ^ __b) & (__c ^ __a)) >> (sizeof(__ckd_dword) * CHAR_BIT - 1);
}
int __mulosi4(int, int, int *);
long __mulodi4(long, long, int *);
__ckd_dword __muloti4(__ckd_dword, __ckd_dword, int *);
__funline int __ckd_mul(int *__z, int __x, int __y) {
int __o;
*__z = __mulosi4(__x, __y, &__o);
return __o;
}
__funline int __ckd_mull(long *__z, long __x, long __y) {
int __o;
*__z = __mulodi4(__x, __y, &__o);
return __o;
}
__funline int __ckd_mulll(__ckd_dword *__z, __ckd_dword __x, __ckd_dword __y) {
int __o;
*__z = __muloti4(__x, __y, &__o);
return __o;
}
#endif
#endif /* COSMOPOLITAN_LIBC_STDCKDINT_H_ */

View file

@ -17,6 +17,8 @@
unsigned long: "unsigned long", \
long long: "long long", \
unsigned long long: "unsigned long long", \
__int128: "__int128", \
unsigned __int128: "unsigned __int128", \
float: "float", \
double: "double", \
long double: "long double")