Upgrade to Cosmopolitan GCC 11.2.0 for aarch64

This commit is contained in:
Justine Tunney 2023-06-05 02:07:28 -07:00
parent 39f20dbb13
commit 9cc3e37263
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
63 changed files with 30429 additions and 22750 deletions

165
third_party/aarch64/acc_prof.internal.h vendored Normal file
View file

@ -0,0 +1,165 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _ACC_PROF_H
#define _ACC_PROF_H 1
#include "third_party/aarch64/openacc.internal.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum acc_event_t
{
acc_ev_none = 0,
acc_ev_device_init_start,
acc_ev_device_init_end,
acc_ev_device_shutdown_start,
acc_ev_device_shutdown_end,
acc_ev_runtime_shutdown,
acc_ev_create,
acc_ev_delete,
acc_ev_alloc,
acc_ev_free,
acc_ev_enter_data_start,
acc_ev_enter_data_end,
acc_ev_exit_data_start,
acc_ev_exit_data_end,
acc_ev_update_start,
acc_ev_update_end,
acc_ev_compute_construct_start,
acc_ev_compute_construct_end,
acc_ev_enqueue_launch_start,
acc_ev_enqueue_launch_end,
acc_ev_enqueue_upload_start,
acc_ev_enqueue_upload_end,
acc_ev_enqueue_download_start,
acc_ev_enqueue_download_end,
acc_ev_wait_start,
acc_ev_wait_end,
acc_ev_last
} acc_event_t;
typedef signed long int _acc_prof_ssize_t;
typedef unsigned long int _acc_prof_size_t;
typedef int _acc_prof_int_t;
#define _ACC_PROF_VALID_BYTES_STRUCT(_struct, _lastfield, _valid_bytes_lastfield) offsetof (_struct, _lastfield) + (_valid_bytes_lastfield)
#if 0
#define _ACC_PROF_VALID_BYTES_TYPE_N(_type, _n, _valid_bytes_type) ((_n - 1) * sizeof (_type) + (_valid_bytes_type))
#endif
#define _ACC_PROF_VALID_BYTES_BASICTYPE(_basictype) (sizeof (_basictype))
typedef struct acc_prof_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
_acc_prof_int_t version;
acc_device_t device_type;
_acc_prof_int_t device_number;
_acc_prof_int_t thread_id;
_acc_prof_ssize_t async;
_acc_prof_ssize_t async_queue;
const char *src_file;
const char *func_name;
_acc_prof_int_t line_no, end_line_no;
_acc_prof_int_t func_line_no, func_end_line_no;
#define _ACC_PROF_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_prof_info, func_end_line_no, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_int_t))
} acc_prof_info;
#define _ACC_PROF_INFO_VERSION 201711
typedef enum acc_construct_t
{
acc_construct_parallel = 0,
acc_construct_kernels,
acc_construct_loop,
acc_construct_data,
acc_construct_enter_data,
acc_construct_exit_data,
acc_construct_host_data,
acc_construct_atomic,
acc_construct_declare,
acc_construct_init,
acc_construct_shutdown,
acc_construct_set,
acc_construct_update,
acc_construct_routine,
acc_construct_wait,
acc_construct_runtime_api,
acc_construct_serial
} acc_construct_t;
typedef struct acc_data_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
const char *var_name;
_acc_prof_size_t bytes;
const void *host_ptr;
const void *device_ptr;
#define _ACC_DATA_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_data_event_info, device_ptr, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_data_event_info;
typedef struct acc_launch_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
const char *kernel_name;
_acc_prof_size_t num_gangs, num_workers, vector_length;
#define _ACC_LAUNCH_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_launch_event_info, vector_length, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_size_t))
} acc_launch_event_info;
typedef struct acc_other_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
#define _ACC_OTHER_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_other_event_info, tool_info, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_other_event_info;
typedef union acc_event_info
{
acc_event_t event_type;
acc_data_event_info data_event;
acc_launch_event_info launch_event;
acc_other_event_info other_event;
} acc_event_info;
typedef enum acc_device_api
{
acc_device_api_none = 0,
acc_device_api_cuda,
acc_device_api_opencl,
acc_device_api_coi,
acc_device_api_other
} acc_device_api;
typedef struct acc_api_info
{
acc_device_api device_api;
_acc_prof_int_t valid_bytes;
acc_device_t device_type;
_acc_prof_int_t vendor;
const void *device_handle;
const void *context_handle;
const void *async_handle;
#define _ACC_API_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_api_info, async_handle, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_api_info;
typedef void (*acc_prof_callback) (acc_prof_info *, acc_event_info *,
acc_api_info *);
typedef enum acc_register_t
{
acc_reg = 0,
acc_toggle = 1,
acc_toggle_per_thread = 2
} acc_register_t;
typedef void (*acc_prof_reg) (acc_event_t, acc_prof_callback, acc_register_t);
extern void acc_prof_register (acc_event_t, acc_prof_callback,
acc_register_t) __GOACC_NOTHROW;
extern void acc_prof_unregister (acc_event_t, acc_prof_callback,
acc_register_t) __GOACC_NOTHROW;
typedef void (*acc_query_fn) ();
typedef acc_query_fn (*acc_prof_lookup_func) (const char *);
extern acc_query_fn acc_prof_lookup (const char *) __GOACC_NOTHROW;
extern void acc_register_library (acc_prof_reg, acc_prof_reg,
acc_prof_lookup_func);
#ifdef __cplusplus
}
#endif
#endif
#endif

View file

@ -1,63 +0,0 @@
#ifndef _GCC_ARM_ACLE_H
#define _GCC_ARM_ACLE_H
#ifdef __aarch64__
#include "libc/inttypes.h"
#include "libc/limits.h"
#include "libc/literal.h"
#pragma GCC push_options
#pragma GCC target("+nothing+crc")
#ifdef __cplusplus
extern "C" {
#endif
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_aarch64_crc32b(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cb(uint32_t __a, uint8_t __b) {
return __builtin_aarch64_crc32cb(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32ch(uint32_t __a, uint16_t __b) {
return __builtin_aarch64_crc32ch(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cw(uint32_t __a, uint32_t __b) {
return __builtin_aarch64_crc32cw(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_aarch64_crc32cx(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32h(uint32_t __a, uint16_t __b) {
return __builtin_aarch64_crc32h(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32w(uint32_t __a, uint32_t __b) {
return __builtin_aarch64_crc32w(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32d(uint32_t __a, uint64_t __b) {
return __builtin_aarch64_crc32x(__a, __b);
}
#ifdef __cplusplus
}
#endif
#pragma GCC pop_options
#endif /* __aarch64__ */
#endif

164
third_party/aarch64/arm_acle.internal.h vendored Normal file
View file

@ -0,0 +1,164 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _GCC_ARM_ACLE_H
#define _GCC_ARM_ACLE_H
#ifdef __cplusplus
extern "C" {
#endif
#pragma GCC push_options
#pragma GCC target ("arch=armv8.3-a")
__funline int32_t
__jcvt (double __a)
{
return __builtin_aarch64_jcvtzs (__a);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a")
__funline float
__rint32zf (float __a)
{
return __builtin_aarch64_frint32zsf (__a);
}
__funline double
__rint32z (double __a)
{
return __builtin_aarch64_frint32zdf (__a);
}
__funline float
__rint64zf (float __a)
{
return __builtin_aarch64_frint64zsf (__a);
}
__funline double
__rint64z (double __a)
{
return __builtin_aarch64_frint64zdf (__a);
}
__funline float
__rint32xf (float __a)
{
return __builtin_aarch64_frint32xsf (__a);
}
__funline double
__rint32x (double __a)
{
return __builtin_aarch64_frint32xdf (__a);
}
__funline float
__rint64xf (float __a)
{
return __builtin_aarch64_frint64xsf (__a);
}
__funline double
__rint64x (double __a)
{
return __builtin_aarch64_frint64xdf (__a);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("+nothing+crc")
__funline uint32_t
__crc32b (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32b (__a, __b);
}
__funline uint32_t
__crc32cb (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32cb (__a, __b);
}
__funline uint32_t
__crc32ch (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32ch (__a, __b);
}
__funline uint32_t
__crc32cw (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32cw (__a, __b);
}
__funline uint32_t
__crc32cd (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32cx (__a, __b);
}
__funline uint32_t
__crc32h (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32h (__a, __b);
}
__funline uint32_t
__crc32w (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32w (__a, __b);
}
__funline uint32_t
__crc32d (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32x (__a, __b);
}
#pragma GCC pop_options
#ifdef __ARM_FEATURE_TME
#pragma GCC push_options
#pragma GCC target ("+nothing+tme")
#define _TMFAILURE_REASON 0x00007fffu
#define _TMFAILURE_RTRY 0x00008000u
#define _TMFAILURE_CNCL 0x00010000u
#define _TMFAILURE_MEM 0x00020000u
#define _TMFAILURE_IMP 0x00040000u
#define _TMFAILURE_ERR 0x00080000u
#define _TMFAILURE_SIZE 0x00100000u
#define _TMFAILURE_NEST 0x00200000u
#define _TMFAILURE_DBG 0x00400000u
#define _TMFAILURE_INT 0x00800000u
#define _TMFAILURE_TRIVIAL 0x01000000u
__funline uint64_t
__tstart (void)
{
return __builtin_aarch64_tstart ();
}
__funline void
__tcommit (void)
{
__builtin_aarch64_tcommit ();
}
__funline void
__tcancel (const uint64_t __reason)
{
__builtin_aarch64_tcancel (__reason);
}
__funline uint64_t
__ttest (void)
{
return __builtin_aarch64_ttest ();
}
#pragma GCC pop_options
#endif
#pragma GCC push_options
#pragma GCC target ("+nothing+rng")
__funline int
__rndr (uint64_t *__res)
{
return __builtin_aarch64_rndr (__res);
}
__funline int
__rndrrs (uint64_t *__res)
{
return __builtin_aarch64_rndrrs (__res);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a+memtag")
#define __arm_mte_create_random_tag(__ptr, __u64_mask) __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
#define __arm_mte_exclude_tag(__ptr, __u64_excluded) __builtin_aarch64_memtag_gmi(__ptr, __u64_excluded)
#define __arm_mte_ptrdiff(__ptr_a, __ptr_b) __builtin_aarch64_memtag_subp(__ptr_a, __ptr_b)
#define __arm_mte_increment_tag(__ptr, __u_offset) __builtin_aarch64_memtag_inc_tag(__ptr, __u_offset)
#define __arm_mte_set_tag(__tagged_address) __builtin_aarch64_memtag_set_tag(__tagged_address)
#define __arm_mte_get_tag(__address) __builtin_aarch64_memtag_get_tag(__address)
#pragma GCC pop_options
#ifdef __cplusplus
}
#endif
#endif
#endif

23
third_party/aarch64/arm_bf16.internal.h vendored Normal file
View file

@ -0,0 +1,23 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _AARCH64_BF16_H_
#define _AARCH64_BF16_H_
typedef __bf16 bfloat16_t;
typedef float float32_t;
#pragma GCC push_options
#pragma GCC target ("+nothing+bf16+nosimd")
__extension__ extern __inline bfloat16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_bf16_f32 (float32_t __a)
{
return __builtin_aarch64_bfcvtbf (__a);
}
__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_f32_bf16 (bfloat16_t __a)
{
return __builtin_aarch64_bfcvtsf (__a);
}
#pragma GCC pop_options
#endif
#endif

View file

@ -1,373 +0,0 @@
#ifndef _AARCH64_FP16_H_
#define _AARCH64_FP16_H_
#ifdef __aarch64__
#include "libc/inttypes.h"
#include "libc/limits.h"
#include "libc/literal.h"
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+fp16")
typedef __fp16 float16_t;
__funline float16_t vabsh_f16(float16_t __a) {
return __builtin_aarch64_abshf(__a);
}
__funline uint16_t vceqzh_f16(float16_t __a) {
return __builtin_aarch64_cmeqhf_uss(__a, 0.0f);
}
__funline uint16_t vcgezh_f16(float16_t __a) {
return __builtin_aarch64_cmgehf_uss(__a, 0.0f);
}
__funline uint16_t vcgtzh_f16(float16_t __a) {
return __builtin_aarch64_cmgthf_uss(__a, 0.0f);
}
__funline uint16_t vclezh_f16(float16_t __a) {
return __builtin_aarch64_cmlehf_uss(__a, 0.0f);
}
__funline uint16_t vcltzh_f16(float16_t __a) {
return __builtin_aarch64_cmlthf_uss(__a, 0.0f);
}
__funline float16_t vcvth_f16_s16(int16_t __a) {
return __builtin_aarch64_floathihf(__a);
}
__funline float16_t vcvth_f16_s32(int32_t __a) {
return __builtin_aarch64_floatsihf(__a);
}
__funline float16_t vcvth_f16_s64(int64_t __a) {
return __builtin_aarch64_floatdihf(__a);
}
__funline float16_t vcvth_f16_u16(uint16_t __a) {
return __builtin_aarch64_floatunshihf_us(__a);
}
__funline float16_t vcvth_f16_u32(uint32_t __a) {
return __builtin_aarch64_floatunssihf_us(__a);
}
__funline float16_t vcvth_f16_u64(uint64_t __a) {
return __builtin_aarch64_floatunsdihf_us(__a);
}
__funline int16_t vcvth_s16_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfhi(__a);
}
__funline int32_t vcvth_s32_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfsi(__a);
}
__funline int64_t vcvth_s64_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfdi(__a);
}
__funline uint16_t vcvth_u16_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfhi_us(__a);
}
__funline uint32_t vcvth_u32_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfsi_us(__a);
}
__funline uint64_t vcvth_u64_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfdi_us(__a);
}
__funline int16_t vcvtah_s16_f16(float16_t __a) {
return __builtin_aarch64_lroundhfhi(__a);
}
__funline int32_t vcvtah_s32_f16(float16_t __a) {
return __builtin_aarch64_lroundhfsi(__a);
}
__funline int64_t vcvtah_s64_f16(float16_t __a) {
return __builtin_aarch64_lroundhfdi(__a);
}
__funline uint16_t vcvtah_u16_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfhi_us(__a);
}
__funline uint32_t vcvtah_u32_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfsi_us(__a);
}
__funline uint64_t vcvtah_u64_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfdi_us(__a);
}
__funline int16_t vcvtmh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfhi(__a);
}
__funline int32_t vcvtmh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfsi(__a);
}
__funline int64_t vcvtmh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfdi(__a);
}
__funline uint16_t vcvtmh_u16_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfhi_us(__a);
}
__funline uint32_t vcvtmh_u32_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfsi_us(__a);
}
__funline uint64_t vcvtmh_u64_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfdi_us(__a);
}
__funline int16_t vcvtnh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfhi(__a);
}
__funline int32_t vcvtnh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfsi(__a);
}
__funline int64_t vcvtnh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfdi(__a);
}
__funline uint16_t vcvtnh_u16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfhi_us(__a);
}
__funline uint32_t vcvtnh_u32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfsi_us(__a);
}
__funline uint64_t vcvtnh_u64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfdi_us(__a);
}
__funline int16_t vcvtph_s16_f16(float16_t __a) {
return __builtin_aarch64_lceilhfhi(__a);
}
__funline int32_t vcvtph_s32_f16(float16_t __a) {
return __builtin_aarch64_lceilhfsi(__a);
}
__funline int64_t vcvtph_s64_f16(float16_t __a) {
return __builtin_aarch64_lceilhfdi(__a);
}
__funline uint16_t vcvtph_u16_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfhi_us(__a);
}
__funline uint32_t vcvtph_u32_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfsi_us(__a);
}
__funline uint64_t vcvtph_u64_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfdi_us(__a);
}
__funline float16_t vnegh_f16(float16_t __a) {
return __builtin_aarch64_neghf(__a);
}
__funline float16_t vrecpeh_f16(float16_t __a) {
return __builtin_aarch64_frecpehf(__a);
}
__funline float16_t vrecpxh_f16(float16_t __a) {
return __builtin_aarch64_frecpxhf(__a);
}
__funline float16_t vrndh_f16(float16_t __a) {
return __builtin_aarch64_btrunchf(__a);
}
__funline float16_t vrndah_f16(float16_t __a) {
return __builtin_aarch64_roundhf(__a);
}
__funline float16_t vrndih_f16(float16_t __a) {
return __builtin_aarch64_nearbyinthf(__a);
}
__funline float16_t vrndmh_f16(float16_t __a) {
return __builtin_aarch64_floorhf(__a);
}
__funline float16_t vrndnh_f16(float16_t __a) {
return __builtin_aarch64_frintnhf(__a);
}
__funline float16_t vrndph_f16(float16_t __a) {
return __builtin_aarch64_ceilhf(__a);
}
__funline float16_t vrndxh_f16(float16_t __a) {
return __builtin_aarch64_rinthf(__a);
}
__funline float16_t vrsqrteh_f16(float16_t __a) {
return __builtin_aarch64_rsqrtehf(__a);
}
__funline float16_t vsqrth_f16(float16_t __a) {
return __builtin_aarch64_sqrthf(__a);
}
__funline float16_t vaddh_f16(float16_t __a, float16_t __b) {
return __a + __b;
}
__funline float16_t vabdh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fabdhf(__a, __b);
}
__funline uint16_t vcageh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgehf_uss(__a, __b);
}
__funline uint16_t vcagth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgthf_uss(__a, __b);
}
__funline uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclehf_uss(__a, __b);
}
__funline uint16_t vcalth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclthf_uss(__a, __b);
}
__funline uint16_t vceqh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmeqhf_uss(__a, __b);
}
__funline uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgehf_uss(__a, __b);
}
__funline uint16_t vcgth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgthf_uss(__a, __b);
}
__funline uint16_t vcleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlehf_uss(__a, __b);
}
__funline uint16_t vclth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlthf_uss(__a, __b);
}
__funline float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
return __builtin_aarch64_scvtfhi(__a, __b);
}
__funline float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
return __builtin_aarch64_scvtfsihf(__a, __b);
}
__funline float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
return __builtin_aarch64_scvtfdihf(__a, __b);
}
__funline float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
return __builtin_aarch64_ucvtfhi_sus(__a, __b);
}
__funline float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
return __builtin_aarch64_ucvtfsihf_sus(__a, __b);
}
__funline float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
return __builtin_aarch64_ucvtfdihf_sus(__a, __b);
}
__funline int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshf(__a, __b);
}
__funline int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfsi(__a, __b);
}
__funline int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfdi(__a, __b);
}
__funline uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhf_uss(__a, __b);
}
__funline uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfsi_uss(__a, __b);
}
__funline uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfdi_uss(__a, __b);
}
__funline float16_t vdivh_f16(float16_t __a, float16_t __b) {
return __a / __b;
}
__funline float16_t vmaxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
__funline float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
__funline float16_t vminh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
__funline float16_t vminnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
__funline float16_t vmulh_f16(float16_t __a, float16_t __b) {
return __a * __b;
}
__funline float16_t vmulxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmulxhf(__a, __b);
}
__funline float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_frecpshf(__a, __b);
}
__funline float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_rsqrtshf(__a, __b);
}
__funline float16_t vsubh_f16(float16_t __a, float16_t __b) {
return __a - __b;
}
__funline float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fmahf(__b, __c, __a);
}
__funline float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fnmahf(__b, __c, __a);
}
#pragma GCC pop_options
#undef FUNC
#endif /* __aarch64__ */
#endif /* _AARCH64_FP16_H_ */

455
third_party/aarch64/arm_fp16.internal.h vendored Normal file
View file

@ -0,0 +1,455 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _AARCH64_FP16_H_
#define _AARCH64_FP16_H_
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+fp16")
typedef __fp16 float16_t;
__funline float16_t
vabsh_f16 (float16_t __a)
{
return __builtin_aarch64_abshf (__a);
}
__funline uint16_t
vceqzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
}
__funline uint16_t
vcgezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
}
__funline uint16_t
vcgtzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
}
__funline uint16_t
vclezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
}
__funline uint16_t
vcltzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
}
__funline float16_t
vcvth_f16_s16 (int16_t __a)
{
return __builtin_aarch64_floathihf (__a);
}
__funline float16_t
vcvth_f16_s32 (int32_t __a)
{
return __builtin_aarch64_floatsihf (__a);
}
__funline float16_t
vcvth_f16_s64 (int64_t __a)
{
return __builtin_aarch64_floatdihf (__a);
}
__funline float16_t
vcvth_f16_u16 (uint16_t __a)
{
return __builtin_aarch64_floatunshihf_us (__a);
}
__funline float16_t
vcvth_f16_u32 (uint32_t __a)
{
return __builtin_aarch64_floatunssihf_us (__a);
}
__funline float16_t
vcvth_f16_u64 (uint64_t __a)
{
return __builtin_aarch64_floatunsdihf_us (__a);
}
__funline int16_t
vcvth_s16_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfhi (__a);
}
__funline int32_t
vcvth_s32_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfsi (__a);
}
__funline int64_t
vcvth_s64_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfdi (__a);
}
__funline uint16_t
vcvth_u16_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfhi_us (__a);
}
__funline uint32_t
vcvth_u32_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfsi_us (__a);
}
__funline uint64_t
vcvth_u64_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfdi_us (__a);
}
__funline int16_t
vcvtah_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfhi (__a);
}
__funline int32_t
vcvtah_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfsi (__a);
}
__funline int64_t
vcvtah_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfdi (__a);
}
__funline uint16_t
vcvtah_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfhi_us (__a);
}
__funline uint32_t
vcvtah_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfsi_us (__a);
}
__funline uint64_t
vcvtah_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfdi_us (__a);
}
__funline int16_t
vcvtmh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfhi (__a);
}
__funline int32_t
vcvtmh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfsi (__a);
}
__funline int64_t
vcvtmh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfdi (__a);
}
__funline uint16_t
vcvtmh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfhi_us (__a);
}
__funline uint32_t
vcvtmh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfsi_us (__a);
}
__funline uint64_t
vcvtmh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfdi_us (__a);
}
__funline int16_t
vcvtnh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfhi (__a);
}
__funline int32_t
vcvtnh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfsi (__a);
}
__funline int64_t
vcvtnh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfdi (__a);
}
__funline uint16_t
vcvtnh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfhi_us (__a);
}
__funline uint32_t
vcvtnh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfsi_us (__a);
}
__funline uint64_t
vcvtnh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfdi_us (__a);
}
__funline int16_t
vcvtph_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfhi (__a);
}
__funline int32_t
vcvtph_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfsi (__a);
}
__funline int64_t
vcvtph_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfdi (__a);
}
__funline uint16_t
vcvtph_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfhi_us (__a);
}
__funline uint32_t
vcvtph_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfsi_us (__a);
}
__funline uint64_t
vcvtph_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfdi_us (__a);
}
__funline float16_t
vnegh_f16 (float16_t __a)
{
return __builtin_aarch64_neghf (__a);
}
__funline float16_t
vrecpeh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpehf (__a);
}
__funline float16_t
vrecpxh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpxhf (__a);
}
__funline float16_t
vrndh_f16 (float16_t __a)
{
return __builtin_aarch64_btrunchf (__a);
}
__funline float16_t
vrndah_f16 (float16_t __a)
{
return __builtin_aarch64_roundhf (__a);
}
__funline float16_t
vrndih_f16 (float16_t __a)
{
return __builtin_aarch64_nearbyinthf (__a);
}
__funline float16_t
vrndmh_f16 (float16_t __a)
{
return __builtin_aarch64_floorhf (__a);
}
__funline float16_t
vrndnh_f16 (float16_t __a)
{
return __builtin_aarch64_frintnhf (__a);
}
__funline float16_t
vrndph_f16 (float16_t __a)
{
return __builtin_aarch64_ceilhf (__a);
}
__funline float16_t
vrndxh_f16 (float16_t __a)
{
return __builtin_aarch64_rinthf (__a);
}
__funline float16_t
vrsqrteh_f16 (float16_t __a)
{
return __builtin_aarch64_rsqrtehf (__a);
}
__funline float16_t
vsqrth_f16 (float16_t __a)
{
return __builtin_aarch64_sqrthf (__a);
}
__funline float16_t
vaddh_f16 (float16_t __a, float16_t __b)
{
return __a + __b;
}
__funline float16_t
vabdh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fabdhf (__a, __b);
}
__funline uint16_t
vcageh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgehf_uss (__a, __b);
}
__funline uint16_t
vcagth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgthf_uss (__a, __b);
}
__funline uint16_t
vcaleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclehf_uss (__a, __b);
}
__funline uint16_t
vcalth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclthf_uss (__a, __b);
}
__funline uint16_t
vceqh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmeqhf_uss (__a, __b);
}
__funline uint16_t
vcgeh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgehf_uss (__a, __b);
}
__funline uint16_t
vcgth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgthf_uss (__a, __b);
}
__funline uint16_t
vcleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlehf_uss (__a, __b);
}
__funline uint16_t
vclth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlthf_uss (__a, __b);
}
__funline float16_t
vcvth_n_f16_s16 (int16_t __a, const int __b)
{
return __builtin_aarch64_scvtfhi (__a, __b);
}
__funline float16_t
vcvth_n_f16_s32 (int32_t __a, const int __b)
{
return __builtin_aarch64_scvtfsihf (__a, __b);
}
__funline float16_t
vcvth_n_f16_s64 (int64_t __a, const int __b)
{
return __builtin_aarch64_scvtfdihf (__a, __b);
}
__funline float16_t
vcvth_n_f16_u16 (uint16_t __a, const int __b)
{
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
}
__funline float16_t
vcvth_n_f16_u32 (uint32_t __a, const int __b)
{
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
}
__funline float16_t
vcvth_n_f16_u64 (uint64_t __a, const int __b)
{
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
}
__funline int16_t
vcvth_n_s16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshf (__a, __b);
}
__funline int32_t
vcvth_n_s32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfsi (__a, __b);
}
__funline int64_t
vcvth_n_s64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfdi (__a, __b);
}
__funline uint16_t
vcvth_n_u16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
}
__funline uint32_t
vcvth_n_u32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
}
__funline uint64_t
vcvth_n_u64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
}
__funline float16_t
vdivh_f16 (float16_t __a, float16_t __b)
{
return __a / __b;
}
__funline float16_t
vmaxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
vmaxnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
vminh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
vminnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
vmulh_f16 (float16_t __a, float16_t __b)
{
return __a * __b;
}
__funline float16_t
vmulxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmulxhf (__a, __b);
}
__funline float16_t
vrecpsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_frecpshf (__a, __b);
}
__funline float16_t
vrsqrtsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_rsqrtshf (__a, __b);
}
__funline float16_t
vsubh_f16 (float16_t __a, float16_t __b)
{
return __a - __b;
}
__funline float16_t
vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fmahf (__b, __c, __a);
}
__funline float16_t
vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fnmahf (__b, __c, __a);
}
#pragma GCC pop_options
#endif
#endif

File diff suppressed because it is too large Load diff

29402
third_party/aarch64/arm_neon.internal.h vendored Normal file

File diff suppressed because it is too large Load diff

11
third_party/aarch64/arm_sve.internal.h vendored Normal file
View file

@ -0,0 +1,11 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _ARM_SVE_H_
#define _ARM_SVE_H_
#include "third_party/aarch64/arm_bf16.internal.h"
typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;
#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h"
#endif
#endif

112
third_party/aarch64/openacc.internal.h vendored Normal file
View file

@ -0,0 +1,112 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _OPENACC_H
#define _OPENACC_H 1
#ifdef __cplusplus
extern "C" {
#endif
#if __cplusplus >= 201103
# define __GOACC_NOTHROW noexcept
#elif __cplusplus
# define __GOACC_NOTHROW throw ()
#else
# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
#endif
typedef enum acc_device_t {
acc_device_current = -1,
acc_device_none = 0,
acc_device_default = 1,
acc_device_host = 2,
acc_device_not_host = 4,
acc_device_nvidia = 5,
acc_device_radeon = 8,
_ACC_device_hwm,
_ACC_highest = __INT_MAX__,
_ACC_neg = -1
} acc_device_t;
typedef enum acc_device_property_t {
acc_property_memory = 1,
acc_property_free_memory = 2,
acc_property_name = 0x10001,
acc_property_vendor = 0x10002,
acc_property_driver = 0x10003
} acc_device_property_t;
typedef enum acc_async_t {
acc_async_noval = -1,
acc_async_sync = -2
} acc_async_t;
int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
size_t acc_get_property
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
const char *acc_get_property_string
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
int acc_async_test (int) __GOACC_NOTHROW;
int acc_async_test_all (void) __GOACC_NOTHROW;
void acc_wait (int) __GOACC_NOTHROW;
void acc_async_wait (int) __GOACC_NOTHROW;
void acc_wait_async (int, int) __GOACC_NOTHROW;
void acc_wait_all (void) __GOACC_NOTHROW;
void acc_async_wait_all (void) __GOACC_NOTHROW;
void acc_wait_all_async (int) __GOACC_NOTHROW;
void acc_init (acc_device_t) __GOACC_NOTHROW;
void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
#ifdef __cplusplus
int acc_on_device (int __arg) __GOACC_NOTHROW;
#else
int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
#endif
void *acc_malloc (size_t) __GOACC_NOTHROW;
void acc_free (void *) __GOACC_NOTHROW;
void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
void *acc_pcopyin (void *, size_t) __GOACC_NOTHROW;
void *acc_create (void *, size_t) __GOACC_NOTHROW;
void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
void *acc_pcreate (void *, size_t) __GOACC_NOTHROW;
void acc_copyout (void *, size_t) __GOACC_NOTHROW;
void acc_delete (void *, size_t) __GOACC_NOTHROW;
void acc_update_device (void *, size_t) __GOACC_NOTHROW;
void acc_update_self (void *, size_t) __GOACC_NOTHROW;
void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
void acc_unmap_data (void *) __GOACC_NOTHROW;
void *acc_deviceptr (void *) __GOACC_NOTHROW;
void *acc_hostptr (void *) __GOACC_NOTHROW;
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_attach (void **) __GOACC_NOTHROW;
void acc_attach_async (void **, int) __GOACC_NOTHROW;
void acc_detach (void **) __GOACC_NOTHROW;
void acc_detach_async (void **, int) __GOACC_NOTHROW;
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_detach_finalize (void **) __GOACC_NOTHROW;
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
#ifdef __cplusplus
}
#pragma acc routine seq
inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
{
return acc_on_device ((int) __arg);
}
#endif
#endif
#endif

72
third_party/aarch64/upgrade.sh vendored Executable file
View file

@ -0,0 +1,72 @@
#!/bin/sh
# /opt/aarch64o2/lib/gcc/aarch64-linux-musl/9.2.0/include
# /opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
# IMPORTANT NOTES
#
# 1. You also need:
# #pragma GCC diagnostic ignored "-Wmissing-braces"
# In third_party/aarch64/arm_neon.internal.h
#
# 2. You have to rewrite arm_fp16 to use `__funline`.
#
# 3. You should fix up the `#pragma GCC aarch64` things.
#
s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
d=third_party/aarch64
FILES='
arm_acle
arm_fp16
arm_neon
acc_prof
arm_bf16
arm_sve
acc_prof
openacc
'
strip_c_comments() {
# https://stackoverflow.com/a/13062682/1653720
[ $# -eq 2 ] && arg="$1" || arg=""
eval file="\$$#"
sed 's/a/aA/g; s/__/aB/g; s/#/aC/g' "$file" |
gcc -P -E $arg - |
sed 's/aC/#/g; s/aB/__/g; s/aA/a/g'
}
rm -f third_party/aarch64/*.h
for f in $FILES; do
echo cp $s/$f.h $d/$f.internal.h
cp $s/$f.h $d/$f.internal.h || exit
done
sed -i \
-e 's/# *include/#include/' \
-e '/#include .std/d' \
-e 's!#include [<"]!#include "third_party/aarch64/!' \
-e 's!\.h[>"]$!.internal.h"!' \
third_party/aarch64/*.h
# solve the pedantic gcc linter warning `'vmulxh_f16' is static but used
# in inline function 'vmulxh_laneq_f16' which is not static [-Werror]`
sed -i \
-e 's/static/extern/g' \
third_party/aarch64/arm_fp16.internal.h
for f in third_party/aarch64/*.h; do
strip_c_comments $f >$f.tmp || exit
mv $f.tmp $f
done
for f in third_party/aarch64/*.h; do
(
printf %s\\n '/* clang-format off */'
printf %s\\n '#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)'
cat $f
printf %s\\n '#endif'
) >$f.tmp
mv $f.tmp $f
done