Implement more bf16/fp16 compiler runtimes

Fixes #1259
This commit is contained in:
Justine Tunney 2024-09-13 05:06:34 -07:00
parent 6b10f4d0b6
commit b5fcb59a85
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
11 changed files with 209 additions and 178 deletions

View file

@ -65,6 +65,26 @@ typedef __UINT64_TYPE__ uint64_t;
typedef __INTMAX_TYPE__ intmax_t; typedef __INTMAX_TYPE__ intmax_t;
typedef __UINTMAX_TYPE__ uintmax_t; typedef __UINTMAX_TYPE__ uintmax_t;
/* TODO(jart): re-import compiler-rt once they have it */
#if defined(__x86_64__) && defined(__FLT128_MAX_10_EXP__)
#undef __FLT128_MAX_10_EXP__
#undef __FLT128_DENORM_MIN__
#undef __FLT128_MIN_EXP__
#undef __FLT128_MIN_10_EXP__
#undef __FLT128_MANT_DIG__
#undef __FLT128_HAS_INFINITY__
#undef __FLT128_EPSILON__
#undef __FLT128_MAX_EXP__
#undef __FLT128_HAS_DENORM__
#undef __FLT128_DIG__
#undef __FLT128_MIN__
#undef __FLT128_MAX__
#undef __FLT128_NORM_MAX__
#undef __FLT128_HAS_QUIET_NAN__
#undef __FLT128_IS_IEC_60559__
#undef __FLT128_DECIMAL_DIG__
#endif
#define __DEFINED_max_align_t #define __DEFINED_max_align_t
typedef long double max_align_t; typedef long double max_align_t;

View file

@ -17,12 +17,53 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
__bf16 __truncsfbf2(float f) { /**
* @fileoverview bf16 compiler runtime
*/
_Float32 __extendbfsf2(__bf16 f) {
union { union {
float f; __bf16 f;
unsigned i; uint16_t i;
} ub = {f};
// convert brain16 to binary32
uint32_t x = (uint32_t)ub.i << 16;
// force nan to quiet
if ((x & 0x7fffffff) > 0x7f800000)
x |= 0x00400000;
// pun to _Float32
union {
uint32_t i;
_Float32 f;
} uf = {x};
return uf.f;
}
_Float64 __extendbfdf2(__bf16 f) {
return __extendbfsf2(f);
}
#ifdef __x86_64__
__float80 __extendbfxf2(__bf16 f) {
return __extendbfsf2(f);
}
#endif
#ifdef __aarch64__
_Float128 __extendbftf2(__bf16 f) {
return __extendbfsf2(f);
}
#endif
__bf16 __truncsfbf2(_Float32 f) {
union {
_Float32 f;
uint32_t i;
} uf = {f}; } uf = {f};
unsigned x = uf.i; uint32_t x = uf.i;
if ((x & 0x7fffffff) > 0x7f800000) if ((x & 0x7fffffff) > 0x7f800000)
// force nan to quiet // force nan to quiet
@ -33,8 +74,24 @@ __bf16 __truncsfbf2(float f) {
// pun to bf16 // pun to bf16
union { union {
unsigned short i; uint16_t i;
__bf16 f; __bf16 f;
} ub = {x}; } ub = {x};
return ub.f; return ub.f;
} }
__bf16 __truncdfbf2(_Float64 f) {
return __truncsfbf2(f);
}
#ifdef __x86_64__
__bf16 __truncxfbf2(__float80 f) {
return __truncsfbf2(f);
}
#endif
#ifdef __aarch64__
__bf16 __trunctfbf2(_Float128 f) {
return __truncsfbf2(f);
}
#endif

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
float __extendbfsf2(__bf16 f) {
union {
__bf16 f;
unsigned short i;
} ub = {f};
// convert brain16 to binary32
unsigned x = (unsigned)ub.i << 16;
// force nan to quiet
if ((x & 0x7fffffff) > 0x7f800000)
x |= 0x00400000;
// pun to float
union {
unsigned i;
float f;
} uf = {x};
return uf.f;
}

View file

@ -8,8 +8,6 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// //
__static_yoink("huge_compiler_rt_license");
#define QUAD_PRECISION #define QUAD_PRECISION
#include "third_party/compiler_rt/fp_lib.inc" #include "third_party/compiler_rt/fp_lib.inc"

View file

@ -21,22 +21,135 @@
* @fileoverview fp16 compiler runtime * @fileoverview fp16 compiler runtime
*/ */
#define asint(x) ((union pun){x}).i #define isnan16(x) (((x) & 0x7fff) > 0x7c00)
#define isnan(x) (((x) & 0x7fff) > 0x7c00)
union pun { static inline _Float16 tofloat16(int x) {
union {
uint16_t i;
_Float16 f; _Float16 f;
unsigned short i; } u = {x};
}; return u.f;
}
static inline int fromfloat16(_Float16 x) {
union {
_Float16 f;
uint16_t i;
} u = {x};
return u.i;
}
static inline _Float32 tofloat32(uint32_t w) {
union {
uint32_t as_bits;
_Float32 as_value;
} fp32;
fp32.as_bits = w;
return fp32.as_value;
}
static inline uint32_t fromfloat32(_Float32 f) {
union {
_Float32 as_value;
uint32_t as_bits;
} fp32;
fp32.as_value = f;
return fp32.as_bits;
}
static inline _Float32 fabs32(_Float32 x) {
return tofloat32(fromfloat32(x) & 0x7fffffffu);
}
int __eqhf2(_Float16 fx, _Float16 fy) { int __eqhf2(_Float16 fx, _Float16 fy) {
int x = asint(fx); int x = fromfloat16(fx);
int y = asint(fy); int y = fromfloat16(fy);
return (x == y) & !isnan(x) & !isnan(y); return (x == y) & !isnan16(x) & !isnan16(y);
} }
int __nehf2(_Float16 fx, _Float16 fy) { int __nehf2(_Float16 fx, _Float16 fy) {
int x = asint(fx); int x = fromfloat16(fx);
int y = asint(fy); int y = fromfloat16(fy);
return (x != y) & !isnan(x) & !isnan(y); return (x != y) & !isnan16(x) & !isnan16(y);
} }
_Float32 __extendhfsf2(_Float16 f) {
uint16_t h = fromfloat16(f);
const uint32_t w = (uint32_t)h << 16;
const uint32_t sign = w & 0x80000000u;
const uint32_t two_w = w + w;
const uint32_t exp_offset = 0xE0u << 23;
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
defined(__GNUC__) && !defined(__STRICT_ANSI__)
const _Float32 exp_scale = 0x1.0p-112f;
#else
const _Float32 exp_scale = tofloat32(0x7800000u);
#endif
const _Float32 normalized_value =
tofloat32((two_w >> 4) + exp_offset) * exp_scale;
const uint32_t magic_mask = 126u << 23;
const _Float32 magic_bias = 0.5f;
const _Float32 denormalized_value =
tofloat32((two_w >> 17) | magic_mask) - magic_bias;
const uint32_t denormalized_cutoff = 1u << 27;
const uint32_t result =
sign | (two_w < denormalized_cutoff ? fromfloat32(denormalized_value)
: fromfloat32(normalized_value));
return tofloat32(result);
}
_Float64 __extendhfdf2(_Float16 f) {
return __extendhfsf2(f);
}
#ifdef __x86_64__
__float80 __extendhfxf2(_Float16 f) {
return __extendhfsf2(f);
}
#endif
#ifdef __aarch64__
_Float128 __extendhftf2(_Float16 f) {
return __extendhfsf2(f);
}
#endif
_Float16 __truncsfhf2(_Float32 f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || \
defined(__GNUC__) && !defined(__STRICT_ANSI__)
const _Float32 scale_to_inf = 0x1.0p+112f;
const _Float32 scale_to_zero = 0x1.0p-110f;
#else
const _Float32 scale_to_inf = tofloat32(0x77800000u);
const _Float32 scale_to_zero = tofloat32(0x08800000u);
#endif
_Float32 base = (fabs32(f) * scale_to_inf) * scale_to_zero;
const uint32_t w = fromfloat32(f);
const uint32_t shl1_w = w + w;
const uint32_t sign = w & 0x80000000u;
uint32_t bias = shl1_w & 0xFF000000u;
if (bias < 0x71000000u)
bias = 0x71000000u;
base = tofloat32((bias >> 1) + 0x07800000u) + base;
const uint32_t bits = fromfloat32(base);
const uint32_t exp_bits = (bits >> 13) & 0x00007C00u;
const uint32_t mantissa_bits = bits & 0x00000FFFu;
const uint32_t nonsign = exp_bits + mantissa_bits;
return tofloat16((sign >> 16) | (shl1_w > 0xFF000000u ? 0x7E00u : nonsign));
}
_Float16 __truncdfhf2(_Float64 f) {
return __truncsfhf2(f);
}
#ifdef __x86_64__
_Float16 __truncxfhf2(__float80 f) {
return __truncsfhf2(f);
}
#endif
#ifdef __aarch64__
_Float16 __trunctfhf2(_Float128 f) {
return __truncsfhf2(f);
}
#endif

View file

@ -1,24 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
__bf16 __truncsfbf2(float);
__bf16 __truncdfbf2(double f) {
// TODO(jart): What else are we supposed to do here?
return __truncsfbf2(f);
}

View file

@ -7,8 +7,6 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
__static_yoink("huge_compiler_rt_license");
#define QUAD_PRECISION #define QUAD_PRECISION
#include "third_party/compiler_rt/fp_lib.inc" #include "third_party/compiler_rt/fp_lib.inc"

View file

@ -1,17 +0,0 @@
//===-- lib/extendhfdf2.c - half -> dubble conversion -------------*- C -*-===//
//
// The Cosmopolitan Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#define SRC_HALF
#define DST_DOUBLE
#include "third_party/compiler_rt/fp16_extend_impl.inc"
COMPILER_RT_ABI dst_t __extendhfdf2(src_t a) {
return __extendXfYf2__(a);
}

View file

@ -1,27 +0,0 @@
//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define SRC_HALF
#define DST_SINGLE
#include "fp16_extend_impl.inc"
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) {
return __extendXfYf2__(a);
}
COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); }
#else
COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f)
#endif
#endif

View file

@ -1,21 +0,0 @@
//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define SRC_DOUBLE
#define DST_HALF
#include "fp16_trunc_impl.inc"
COMPILER_RT_ABI dst_t __truncdfhf2(double a) { return __truncXfYf2__(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI dst_t __aeabi_d2h(double a) { return __truncdfhf2(a); }
#else
COMPILER_RT_ALIAS(__truncdfhf2, __aeabi_d2h)
#endif
#endif

View file

@ -1,27 +0,0 @@
//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define SRC_SINGLE
#define DST_HALF
#include "fp16_trunc_impl.inc"
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI NOINLINE dst_t __truncsfhf2(float a) {
return __truncXfYf2__(a);
}
COMPILER_RT_ABI dst_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); }
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI dst_t __aeabi_f2h(float a) { return __truncsfhf2(a); }
#else
COMPILER_RT_ALIAS(__truncsfhf2, __aeabi_f2h)
#endif
#endif