Release Cosmopolitan v3.3

This change upgrades to GCC 12.3 and GNU binutils 2.42. The GNU linker
appears to have changed things so that only a single de-duplicated str
table is present in the binary, and it gets placed wherever the linker
wants, regardless of what the linker script says. To cope with that we
need to stop using .ident to embed licenses. As such, this change does
significant work to revamp how third party licenses are defined in the
codebase, using `.section .notice,"aR",@progbits`.

This new GCC 12.3 toolchain has support for GNU indirect functions. It
lets us support __target_clones__ for the first time. This is used for
optimizing the performance of libc string functions such as strlen and
friends so far on x86, by ensuring AVX systems favor a second codepath
that uses VEX encoding. It shaves some latency off certain operations.
It's a useful feature to have for scientific computing for the reasons
explained by the test/libcxx/openmp_test.cc example which compiles for
fifteen different microarchitectures. Thanks to the upgrades, it's now
also possible to use newer instruction sets, such as AVX512FP16, VNNI.

Cosmo now uses the %gs register on x86 by default for TLS. Doing it is
helpful for any program that links `cosmo_dlopen()`. Such programs had
to recompile their binaries at startup to change the TLS instructions.
That's not great, since it means every page in the executable needs to
be faulted. The work of rewriting TLS-related x86 opcodes, is moved to
fixupobj.com instead. This is great news for MacOS x86 users, since we
previously needed to morph the binary every time for that platform but
now that's no longer necessary. The only platforms where we need fixup
of TLS x86 opcodes at runtime are now Windows, OpenBSD, and NetBSD. On
Windows we morph TLS to point deeper into the TIB, based on a TlsAlloc
assignment, and on OpenBSD/NetBSD we morph %gs back into %fs since the
kernels do not allow us to specify a value for the %gs register.

OpenBSD users are now required to use APE Loader to run Cosmo binaries
and assimilation is no longer possible. OpenBSD kernel needs to change
to allow programs to specify a value for the %gs register, or it needs
to stop marking executable pages loaded by the kernel as mimmutable().

This release fixes __constructor__, .ctor, .init_array, and lastly the
.preinit_array so they behave the exact same way as glibc.

We no longer use hex constants to define math.h symbols like M_PI.
This commit is contained in:
Justine Tunney 2024-02-20 11:12:09 -08:00
parent d3ff48c63f
commit 957c61cbbf
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
736 changed files with 13726 additions and 9445 deletions

View file

@ -1,12 +1,45 @@
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _GCC_ARM_ACLE_H
#define _GCC_ARM_ACLE_H
#pragma GCC aarch64 "arm_acle.h"
#ifdef __cplusplus
extern "C" {
#endif
#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) __extension__ extern __inline TYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) NAME (TYPE __value, uint32_t __rotate) { size_t __size = sizeof (TYPE) * __CHAR_BIT__; __rotate = __rotate % __size; return __value >> __rotate | __value << ((__size - __rotate) % __size); }
_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t)
_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long)
_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t)
#undef _GCC_ARM_ACLE_ROR_FN
#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, ITYPE, RTYPE) __extension__ extern __inline RTYPE __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __##NAME (ITYPE __value) { return __builtin_##BUILTIN (__value); }
_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t, unsigned int)
_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long, unsigned int)
_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t, unsigned int)
_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t, unsigned int)
_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long, unsigned int)
_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t, unsigned int)
_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t, uint32_t)
_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long, unsigned long)
_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t, uint64_t)
_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t, uint32_t)
_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long, unsigned long)
_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t, uint64_t)
_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t, int16_t)
_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t, uint32_t)
_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t)
#undef _GCC_ARM_ACLE_DATA_FN
__extension__ extern __inline unsigned long
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__revl (unsigned long __value)
{
if (sizeof (unsigned long) == 8)
return __revll (__value);
else
return __rev (__value);
}
#pragma GCC push_options
#pragma GCC target ("arch=armv8.3-a")
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__jcvt (double __a)
{
return __builtin_aarch64_jcvtzs (__a);
@ -14,42 +47,50 @@ __jcvt (double __a)
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a")
__funline float
__extension__ extern __inline float
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint32zf (float __a)
{
return __builtin_aarch64_frint32zsf (__a);
}
__funline double
__extension__ extern __inline double
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint32z (double __a)
{
return __builtin_aarch64_frint32zdf (__a);
}
__funline float
__extension__ extern __inline float
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint64zf (float __a)
{
return __builtin_aarch64_frint64zsf (__a);
}
__funline double
__extension__ extern __inline double
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint64z (double __a)
{
return __builtin_aarch64_frint64zdf (__a);
}
__funline float
__extension__ extern __inline float
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint32xf (float __a)
{
return __builtin_aarch64_frint32xsf (__a);
}
__funline double
__extension__ extern __inline double
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint32x (double __a)
{
return __builtin_aarch64_frint32xdf (__a);
}
__funline float
__extension__ extern __inline float
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint64xf (float __a)
{
return __builtin_aarch64_frint64xsf (__a);
}
__funline double
__extension__ extern __inline double
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rint64x (double __a)
{
return __builtin_aarch64_frint64xdf (__a);
@ -57,42 +98,50 @@ __rint64x (double __a)
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("+nothing+crc")
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32b (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32b (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32cb (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32cb (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32ch (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32ch (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32cw (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32cw (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32cd (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32cx (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32h (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32h (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32w (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32w (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__crc32d (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32x (__a, __b);
@ -112,36 +161,72 @@ __crc32d (uint32_t __a, uint64_t __b)
#define _TMFAILURE_DBG 0x00400000u
#define _TMFAILURE_INT 0x00800000u
#define _TMFAILURE_TRIVIAL 0x01000000u
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__tstart (void)
{
return __builtin_aarch64_tstart ();
}
__funline void
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__tcommit (void)
{
__builtin_aarch64_tcommit ();
}
__funline void
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__tcancel (const uint64_t __reason)
{
__builtin_aarch64_tcancel (__reason);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__ttest (void)
{
return __builtin_aarch64_ttest ();
}
#pragma GCC pop_options
#endif
#ifdef __ARM_FEATURE_LS64
#pragma GCC push_options
#pragma GCC target ("+nothing+ls64")
typedef __arm_data512_t data512_t;
__extension__ extern __inline data512_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_ld64b (const void *__addr)
{
return __builtin_aarch64_ld64b (__addr);
}
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_st64b (void *__addr, data512_t __value)
{
__builtin_aarch64_st64b (__addr, __value);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_st64bv (void *__addr, data512_t __value)
{
return __builtin_aarch64_st64bv (__addr, __value);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_st64bv0 (void *__addr, data512_t __value)
{
return __builtin_aarch64_st64bv0 (__addr, __value);
}
#pragma GCC pop_options
#endif
#pragma GCC push_options
#pragma GCC target ("+nothing+rng")
__funline int
__extension__ extern __inline int
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rndr (uint64_t *__res)
{
return __builtin_aarch64_rndr (__res);
}
__funline int
__extension__ extern __inline int
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__rndrrs (uint64_t *__res)
{
return __builtin_aarch64_rndrrs (__res);

View file

@ -4,447 +4,536 @@
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+fp16")
typedef __fp16 float16_t;
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabsh_f16 (float16_t __a)
{
return __builtin_aarch64_abshf (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgtzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vclezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcltzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_s16 (int16_t __a)
{
return __builtin_aarch64_floathihf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_s32 (int32_t __a)
{
return __builtin_aarch64_floatsihf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_s64 (int64_t __a)
{
return __builtin_aarch64_floatdihf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_u16 (uint16_t __a)
{
return __builtin_aarch64_floatunshihf_us (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_u32 (uint32_t __a)
{
return __builtin_aarch64_floatunssihf_us (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_f16_u64 (uint64_t __a)
{
return __builtin_aarch64_floatunsdihf_us (__a);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_s16_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfhi (__a);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_s32_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfsi (__a);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_s64_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfdi (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_u16_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfhi_us (__a);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_u32_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfsi_us (__a);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_u64_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfdi_us (__a);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfhi (__a);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfsi (__a);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfdi (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfhi_us (__a);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfsi_us (__a);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfdi_us (__a);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfhi (__a);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfsi (__a);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfdi (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfhi_us (__a);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfsi_us (__a);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtmh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfdi_us (__a);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfhi (__a);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfsi (__a);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfdi (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfhi_us (__a);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfsi_us (__a);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtnh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfdi_us (__a);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfhi (__a);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfsi (__a);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfdi (__a);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfhi_us (__a);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfsi_us (__a);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtph_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfdi_us (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vnegh_f16 (float16_t __a)
{
return __builtin_aarch64_neghf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrecpeh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpehf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrecpxh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpxhf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndh_f16 (float16_t __a)
{
return __builtin_aarch64_btrunchf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndah_f16 (float16_t __a)
{
return __builtin_aarch64_roundhf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndih_f16 (float16_t __a)
{
return __builtin_aarch64_nearbyinthf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndmh_f16 (float16_t __a)
{
return __builtin_aarch64_floorhf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndnh_f16 (float16_t __a)
{
return __builtin_aarch64_frintnhf (__a);
return __builtin_aarch64_roundevenhf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndph_f16 (float16_t __a)
{
return __builtin_aarch64_ceilhf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrndxh_f16 (float16_t __a)
{
return __builtin_aarch64_rinthf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrteh_f16 (float16_t __a)
{
return __builtin_aarch64_rsqrtehf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsqrth_f16 (float16_t __a)
{
return __builtin_aarch64_sqrthf (__a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vaddh_f16 (float16_t __a, float16_t __b)
{
return __a + __b;
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vabdh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fabdhf (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcageh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgehf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcagth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgthf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcaleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclehf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcalth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclthf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vceqh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmeqhf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgeh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgehf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcgth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgthf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlehf_uss (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vclth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlthf_uss (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_s16 (int16_t __a, const int __b)
{
return __builtin_aarch64_scvtfhi (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_s32 (int32_t __a, const int __b)
{
return __builtin_aarch64_scvtfsihf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_s64 (int64_t __a, const int __b)
{
return __builtin_aarch64_scvtfdihf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_u16 (uint16_t __a, const int __b)
{
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_u32 (uint32_t __a, const int __b)
{
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_f16_u64 (uint64_t __a, const int __b)
{
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
}
__funline int16_t
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_s16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshf (__a, __b);
}
__funline int32_t
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_s32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfsi (__a, __b);
}
__funline int64_t
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_s64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfdi (__a, __b);
}
__funline uint16_t
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_u16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
}
__funline uint32_t
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_u32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
}
__funline uint64_t
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_n_u64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdivh_f16 (float16_t __a, float16_t __b)
{
return __a / __b;
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmaxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmaxnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vminh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vminnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulh_f16 (float16_t __a, float16_t __b)
{
return __a * __b;
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmulxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmulxhf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrecpsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_frecpshf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrsqrtsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_rsqrtshf (__a, __b);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsubh_f16 (float16_t __a, float16_t __b)
{
return __a - __b;
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fmahf (__b, __c, __a);
}
__funline float16_t
__extension__ extern __inline float16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fnmahf (__b, __c, __a);

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,6 @@
typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;
#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h"
#pragma GCC aarch64 "arm_sve.h"
#endif
#endif

View file

@ -13,17 +13,16 @@
# 3. You should fix up the `#pragma GCC aarch64` things.
#
s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
s=/opt/goodies/include
d=third_party/aarch64
FILES='
acc_prof
arm_acle
arm_bf16
arm_fp16
arm_neon
acc_prof
arm_bf16
arm_sve
acc_prof
openacc
'