Upgrade to Cosmopolitan GCC 11.2.0 for aarch64

This commit is contained in:
Justine Tunney 2023-06-05 02:07:28 -07:00
parent 39f20dbb13
commit 9cc3e37263
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
63 changed files with 30429 additions and 22750 deletions

View file

@ -203,7 +203,8 @@ ifeq ($(ARCH), aarch64)
#
DEFAULT_COPTS += \
-ffixed-x18 \
-ffixed-x28
-ffixed-x28 \
-mno-outline-atomics
endif
MATHEMATICAL = \

View file

@ -19,7 +19,7 @@
#include "dsp/core/core.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/emmintrin.internal.h"
/**

View file

@ -190,6 +190,7 @@ o/$(MODE)/libc/calls/unveil.o: private \
ifeq ($(ARCH), aarch64)
o/$(MODE)/libc/calls/sigaction.o: private OVERRIDE_CFLAGS += -mcmodel=large
o/$(MODE)/libc/calls/getloadavg-nt.o: private OVERRIDE_CFLAGS += -ffreestanding
endif
# we want -Os because:

165
third_party/aarch64/acc_prof.internal.h vendored Normal file
View file

@ -0,0 +1,165 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _ACC_PROF_H
#define _ACC_PROF_H 1
#include "third_party/aarch64/openacc.internal.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum acc_event_t
{
acc_ev_none = 0,
acc_ev_device_init_start,
acc_ev_device_init_end,
acc_ev_device_shutdown_start,
acc_ev_device_shutdown_end,
acc_ev_runtime_shutdown,
acc_ev_create,
acc_ev_delete,
acc_ev_alloc,
acc_ev_free,
acc_ev_enter_data_start,
acc_ev_enter_data_end,
acc_ev_exit_data_start,
acc_ev_exit_data_end,
acc_ev_update_start,
acc_ev_update_end,
acc_ev_compute_construct_start,
acc_ev_compute_construct_end,
acc_ev_enqueue_launch_start,
acc_ev_enqueue_launch_end,
acc_ev_enqueue_upload_start,
acc_ev_enqueue_upload_end,
acc_ev_enqueue_download_start,
acc_ev_enqueue_download_end,
acc_ev_wait_start,
acc_ev_wait_end,
acc_ev_last
} acc_event_t;
typedef signed long int _acc_prof_ssize_t;
typedef unsigned long int _acc_prof_size_t;
typedef int _acc_prof_int_t;
#define _ACC_PROF_VALID_BYTES_STRUCT(_struct, _lastfield, _valid_bytes_lastfield) offsetof (_struct, _lastfield) + (_valid_bytes_lastfield)
#if 0
#define _ACC_PROF_VALID_BYTES_TYPE_N(_type, _n, _valid_bytes_type) ((_n - 1) * sizeof (_type) + (_valid_bytes_type))
#endif
#define _ACC_PROF_VALID_BYTES_BASICTYPE(_basictype) (sizeof (_basictype))
typedef struct acc_prof_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
_acc_prof_int_t version;
acc_device_t device_type;
_acc_prof_int_t device_number;
_acc_prof_int_t thread_id;
_acc_prof_ssize_t async;
_acc_prof_ssize_t async_queue;
const char *src_file;
const char *func_name;
_acc_prof_int_t line_no, end_line_no;
_acc_prof_int_t func_line_no, func_end_line_no;
#define _ACC_PROF_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_prof_info, func_end_line_no, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_int_t))
} acc_prof_info;
#define _ACC_PROF_INFO_VERSION 201711
typedef enum acc_construct_t
{
acc_construct_parallel = 0,
acc_construct_kernels,
acc_construct_loop,
acc_construct_data,
acc_construct_enter_data,
acc_construct_exit_data,
acc_construct_host_data,
acc_construct_atomic,
acc_construct_declare,
acc_construct_init,
acc_construct_shutdown,
acc_construct_set,
acc_construct_update,
acc_construct_routine,
acc_construct_wait,
acc_construct_runtime_api,
acc_construct_serial
} acc_construct_t;
typedef struct acc_data_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
const char *var_name;
_acc_prof_size_t bytes;
const void *host_ptr;
const void *device_ptr;
#define _ACC_DATA_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_data_event_info, device_ptr, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_data_event_info;
typedef struct acc_launch_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
const char *kernel_name;
_acc_prof_size_t num_gangs, num_workers, vector_length;
#define _ACC_LAUNCH_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_launch_event_info, vector_length, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_size_t))
} acc_launch_event_info;
typedef struct acc_other_event_info
{
acc_event_t event_type;
_acc_prof_int_t valid_bytes;
acc_construct_t parent_construct;
_acc_prof_int_t implicit;
void *tool_info;
#define _ACC_OTHER_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_other_event_info, tool_info, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_other_event_info;
typedef union acc_event_info
{
acc_event_t event_type;
acc_data_event_info data_event;
acc_launch_event_info launch_event;
acc_other_event_info other_event;
} acc_event_info;
typedef enum acc_device_api
{
acc_device_api_none = 0,
acc_device_api_cuda,
acc_device_api_opencl,
acc_device_api_coi,
acc_device_api_other
} acc_device_api;
typedef struct acc_api_info
{
acc_device_api device_api;
_acc_prof_int_t valid_bytes;
acc_device_t device_type;
_acc_prof_int_t vendor;
const void *device_handle;
const void *context_handle;
const void *async_handle;
#define _ACC_API_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_api_info, async_handle, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
} acc_api_info;
typedef void (*acc_prof_callback) (acc_prof_info *, acc_event_info *,
acc_api_info *);
typedef enum acc_register_t
{
acc_reg = 0,
acc_toggle = 1,
acc_toggle_per_thread = 2
} acc_register_t;
typedef void (*acc_prof_reg) (acc_event_t, acc_prof_callback, acc_register_t);
extern void acc_prof_register (acc_event_t, acc_prof_callback,
acc_register_t) __GOACC_NOTHROW;
extern void acc_prof_unregister (acc_event_t, acc_prof_callback,
acc_register_t) __GOACC_NOTHROW;
typedef void (*acc_query_fn) ();
typedef acc_query_fn (*acc_prof_lookup_func) (const char *);
extern acc_query_fn acc_prof_lookup (const char *) __GOACC_NOTHROW;
extern void acc_register_library (acc_prof_reg, acc_prof_reg,
acc_prof_lookup_func);
#ifdef __cplusplus
}
#endif
#endif
#endif

View file

@ -1,63 +0,0 @@
#ifndef _GCC_ARM_ACLE_H
#define _GCC_ARM_ACLE_H
#ifdef __aarch64__
#include "libc/inttypes.h"
#include "libc/limits.h"
#include "libc/literal.h"
#pragma GCC push_options
#pragma GCC target("+nothing+crc")
#ifdef __cplusplus
extern "C" {
#endif
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_aarch64_crc32b(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cb(uint32_t __a, uint8_t __b) {
return __builtin_aarch64_crc32cb(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32ch(uint32_t __a, uint16_t __b) {
return __builtin_aarch64_crc32ch(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cw(uint32_t __a, uint32_t __b) {
return __builtin_aarch64_crc32cw(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_aarch64_crc32cx(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32h(uint32_t __a, uint16_t __b) {
return __builtin_aarch64_crc32h(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32w(uint32_t __a, uint32_t __b) {
return __builtin_aarch64_crc32w(__a, __b);
}
__extension__ static __inline uint32_t __attribute__((__always_inline__))
__crc32d(uint32_t __a, uint64_t __b) {
return __builtin_aarch64_crc32x(__a, __b);
}
#ifdef __cplusplus
}
#endif
#pragma GCC pop_options
#endif /* __aarch64__ */
#endif

164
third_party/aarch64/arm_acle.internal.h vendored Normal file
View file

@ -0,0 +1,164 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _GCC_ARM_ACLE_H
#define _GCC_ARM_ACLE_H
#ifdef __cplusplus
extern "C" {
#endif
#pragma GCC push_options
#pragma GCC target ("arch=armv8.3-a")
__funline int32_t
__jcvt (double __a)
{
return __builtin_aarch64_jcvtzs (__a);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a")
__funline float
__rint32zf (float __a)
{
return __builtin_aarch64_frint32zsf (__a);
}
__funline double
__rint32z (double __a)
{
return __builtin_aarch64_frint32zdf (__a);
}
__funline float
__rint64zf (float __a)
{
return __builtin_aarch64_frint64zsf (__a);
}
__funline double
__rint64z (double __a)
{
return __builtin_aarch64_frint64zdf (__a);
}
__funline float
__rint32xf (float __a)
{
return __builtin_aarch64_frint32xsf (__a);
}
__funline double
__rint32x (double __a)
{
return __builtin_aarch64_frint32xdf (__a);
}
__funline float
__rint64xf (float __a)
{
return __builtin_aarch64_frint64xsf (__a);
}
__funline double
__rint64x (double __a)
{
return __builtin_aarch64_frint64xdf (__a);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("+nothing+crc")
__funline uint32_t
__crc32b (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32b (__a, __b);
}
__funline uint32_t
__crc32cb (uint32_t __a, uint8_t __b)
{
return __builtin_aarch64_crc32cb (__a, __b);
}
__funline uint32_t
__crc32ch (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32ch (__a, __b);
}
__funline uint32_t
__crc32cw (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32cw (__a, __b);
}
__funline uint32_t
__crc32cd (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32cx (__a, __b);
}
__funline uint32_t
__crc32h (uint32_t __a, uint16_t __b)
{
return __builtin_aarch64_crc32h (__a, __b);
}
__funline uint32_t
__crc32w (uint32_t __a, uint32_t __b)
{
return __builtin_aarch64_crc32w (__a, __b);
}
__funline uint32_t
__crc32d (uint32_t __a, uint64_t __b)
{
return __builtin_aarch64_crc32x (__a, __b);
}
#pragma GCC pop_options
#ifdef __ARM_FEATURE_TME
#pragma GCC push_options
#pragma GCC target ("+nothing+tme")
#define _TMFAILURE_REASON 0x00007fffu
#define _TMFAILURE_RTRY 0x00008000u
#define _TMFAILURE_CNCL 0x00010000u
#define _TMFAILURE_MEM 0x00020000u
#define _TMFAILURE_IMP 0x00040000u
#define _TMFAILURE_ERR 0x00080000u
#define _TMFAILURE_SIZE 0x00100000u
#define _TMFAILURE_NEST 0x00200000u
#define _TMFAILURE_DBG 0x00400000u
#define _TMFAILURE_INT 0x00800000u
#define _TMFAILURE_TRIVIAL 0x01000000u
__funline uint64_t
__tstart (void)
{
return __builtin_aarch64_tstart ();
}
__funline void
__tcommit (void)
{
__builtin_aarch64_tcommit ();
}
__funline void
__tcancel (const uint64_t __reason)
{
__builtin_aarch64_tcancel (__reason);
}
__funline uint64_t
__ttest (void)
{
return __builtin_aarch64_ttest ();
}
#pragma GCC pop_options
#endif
#pragma GCC push_options
#pragma GCC target ("+nothing+rng")
__funline int
__rndr (uint64_t *__res)
{
return __builtin_aarch64_rndr (__res);
}
__funline int
__rndrrs (uint64_t *__res)
{
return __builtin_aarch64_rndrrs (__res);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("arch=armv8.5-a+memtag")
#define __arm_mte_create_random_tag(__ptr, __u64_mask) __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
#define __arm_mte_exclude_tag(__ptr, __u64_excluded) __builtin_aarch64_memtag_gmi(__ptr, __u64_excluded)
#define __arm_mte_ptrdiff(__ptr_a, __ptr_b) __builtin_aarch64_memtag_subp(__ptr_a, __ptr_b)
#define __arm_mte_increment_tag(__ptr, __u_offset) __builtin_aarch64_memtag_inc_tag(__ptr, __u_offset)
#define __arm_mte_set_tag(__tagged_address) __builtin_aarch64_memtag_set_tag(__tagged_address)
#define __arm_mte_get_tag(__address) __builtin_aarch64_memtag_get_tag(__address)
#pragma GCC pop_options
#ifdef __cplusplus
}
#endif
#endif
#endif

23
third_party/aarch64/arm_bf16.internal.h vendored Normal file
View file

@ -0,0 +1,23 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _AARCH64_BF16_H_
#define _AARCH64_BF16_H_
typedef __bf16 bfloat16_t;
typedef float float32_t;
#pragma GCC push_options
#pragma GCC target ("+nothing+bf16+nosimd")
__extension__ extern __inline bfloat16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvth_bf16_f32 (float32_t __a)
{
return __builtin_aarch64_bfcvtbf (__a);
}
__extension__ extern __inline float32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvtah_f32_bf16 (bfloat16_t __a)
{
return __builtin_aarch64_bfcvtsf (__a);
}
#pragma GCC pop_options
#endif
#endif

View file

@ -1,373 +0,0 @@
#ifndef _AARCH64_FP16_H_
#define _AARCH64_FP16_H_
#ifdef __aarch64__
#include "libc/inttypes.h"
#include "libc/limits.h"
#include "libc/literal.h"
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+fp16")
typedef __fp16 float16_t;
__funline float16_t vabsh_f16(float16_t __a) {
return __builtin_aarch64_abshf(__a);
}
__funline uint16_t vceqzh_f16(float16_t __a) {
return __builtin_aarch64_cmeqhf_uss(__a, 0.0f);
}
__funline uint16_t vcgezh_f16(float16_t __a) {
return __builtin_aarch64_cmgehf_uss(__a, 0.0f);
}
__funline uint16_t vcgtzh_f16(float16_t __a) {
return __builtin_aarch64_cmgthf_uss(__a, 0.0f);
}
__funline uint16_t vclezh_f16(float16_t __a) {
return __builtin_aarch64_cmlehf_uss(__a, 0.0f);
}
__funline uint16_t vcltzh_f16(float16_t __a) {
return __builtin_aarch64_cmlthf_uss(__a, 0.0f);
}
__funline float16_t vcvth_f16_s16(int16_t __a) {
return __builtin_aarch64_floathihf(__a);
}
__funline float16_t vcvth_f16_s32(int32_t __a) {
return __builtin_aarch64_floatsihf(__a);
}
__funline float16_t vcvth_f16_s64(int64_t __a) {
return __builtin_aarch64_floatdihf(__a);
}
__funline float16_t vcvth_f16_u16(uint16_t __a) {
return __builtin_aarch64_floatunshihf_us(__a);
}
__funline float16_t vcvth_f16_u32(uint32_t __a) {
return __builtin_aarch64_floatunssihf_us(__a);
}
__funline float16_t vcvth_f16_u64(uint64_t __a) {
return __builtin_aarch64_floatunsdihf_us(__a);
}
__funline int16_t vcvth_s16_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfhi(__a);
}
__funline int32_t vcvth_s32_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfsi(__a);
}
__funline int64_t vcvth_s64_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfdi(__a);
}
__funline uint16_t vcvth_u16_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfhi_us(__a);
}
__funline uint32_t vcvth_u32_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfsi_us(__a);
}
__funline uint64_t vcvth_u64_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfdi_us(__a);
}
__funline int16_t vcvtah_s16_f16(float16_t __a) {
return __builtin_aarch64_lroundhfhi(__a);
}
__funline int32_t vcvtah_s32_f16(float16_t __a) {
return __builtin_aarch64_lroundhfsi(__a);
}
__funline int64_t vcvtah_s64_f16(float16_t __a) {
return __builtin_aarch64_lroundhfdi(__a);
}
__funline uint16_t vcvtah_u16_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfhi_us(__a);
}
__funline uint32_t vcvtah_u32_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfsi_us(__a);
}
__funline uint64_t vcvtah_u64_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfdi_us(__a);
}
__funline int16_t vcvtmh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfhi(__a);
}
__funline int32_t vcvtmh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfsi(__a);
}
__funline int64_t vcvtmh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfdi(__a);
}
__funline uint16_t vcvtmh_u16_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfhi_us(__a);
}
__funline uint32_t vcvtmh_u32_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfsi_us(__a);
}
__funline uint64_t vcvtmh_u64_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfdi_us(__a);
}
__funline int16_t vcvtnh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfhi(__a);
}
__funline int32_t vcvtnh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfsi(__a);
}
__funline int64_t vcvtnh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfdi(__a);
}
__funline uint16_t vcvtnh_u16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfhi_us(__a);
}
__funline uint32_t vcvtnh_u32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfsi_us(__a);
}
__funline uint64_t vcvtnh_u64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfdi_us(__a);
}
__funline int16_t vcvtph_s16_f16(float16_t __a) {
return __builtin_aarch64_lceilhfhi(__a);
}
__funline int32_t vcvtph_s32_f16(float16_t __a) {
return __builtin_aarch64_lceilhfsi(__a);
}
__funline int64_t vcvtph_s64_f16(float16_t __a) {
return __builtin_aarch64_lceilhfdi(__a);
}
__funline uint16_t vcvtph_u16_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfhi_us(__a);
}
__funline uint32_t vcvtph_u32_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfsi_us(__a);
}
__funline uint64_t vcvtph_u64_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfdi_us(__a);
}
__funline float16_t vnegh_f16(float16_t __a) {
return __builtin_aarch64_neghf(__a);
}
__funline float16_t vrecpeh_f16(float16_t __a) {
return __builtin_aarch64_frecpehf(__a);
}
__funline float16_t vrecpxh_f16(float16_t __a) {
return __builtin_aarch64_frecpxhf(__a);
}
__funline float16_t vrndh_f16(float16_t __a) {
return __builtin_aarch64_btrunchf(__a);
}
__funline float16_t vrndah_f16(float16_t __a) {
return __builtin_aarch64_roundhf(__a);
}
__funline float16_t vrndih_f16(float16_t __a) {
return __builtin_aarch64_nearbyinthf(__a);
}
__funline float16_t vrndmh_f16(float16_t __a) {
return __builtin_aarch64_floorhf(__a);
}
__funline float16_t vrndnh_f16(float16_t __a) {
return __builtin_aarch64_frintnhf(__a);
}
__funline float16_t vrndph_f16(float16_t __a) {
return __builtin_aarch64_ceilhf(__a);
}
__funline float16_t vrndxh_f16(float16_t __a) {
return __builtin_aarch64_rinthf(__a);
}
__funline float16_t vrsqrteh_f16(float16_t __a) {
return __builtin_aarch64_rsqrtehf(__a);
}
__funline float16_t vsqrth_f16(float16_t __a) {
return __builtin_aarch64_sqrthf(__a);
}
__funline float16_t vaddh_f16(float16_t __a, float16_t __b) {
return __a + __b;
}
__funline float16_t vabdh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fabdhf(__a, __b);
}
__funline uint16_t vcageh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgehf_uss(__a, __b);
}
__funline uint16_t vcagth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgthf_uss(__a, __b);
}
__funline uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclehf_uss(__a, __b);
}
__funline uint16_t vcalth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclthf_uss(__a, __b);
}
__funline uint16_t vceqh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmeqhf_uss(__a, __b);
}
__funline uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgehf_uss(__a, __b);
}
__funline uint16_t vcgth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgthf_uss(__a, __b);
}
__funline uint16_t vcleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlehf_uss(__a, __b);
}
__funline uint16_t vclth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlthf_uss(__a, __b);
}
__funline float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
return __builtin_aarch64_scvtfhi(__a, __b);
}
__funline float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
return __builtin_aarch64_scvtfsihf(__a, __b);
}
__funline float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
return __builtin_aarch64_scvtfdihf(__a, __b);
}
__funline float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
return __builtin_aarch64_ucvtfhi_sus(__a, __b);
}
__funline float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
return __builtin_aarch64_ucvtfsihf_sus(__a, __b);
}
__funline float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
return __builtin_aarch64_ucvtfdihf_sus(__a, __b);
}
__funline int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshf(__a, __b);
}
__funline int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfsi(__a, __b);
}
__funline int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfdi(__a, __b);
}
__funline uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhf_uss(__a, __b);
}
__funline uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfsi_uss(__a, __b);
}
__funline uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfdi_uss(__a, __b);
}
__funline float16_t vdivh_f16(float16_t __a, float16_t __b) {
return __a / __b;
}
__funline float16_t vmaxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
__funline float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
__funline float16_t vminh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
__funline float16_t vminnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
__funline float16_t vmulh_f16(float16_t __a, float16_t __b) {
return __a * __b;
}
__funline float16_t vmulxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmulxhf(__a, __b);
}
__funline float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_frecpshf(__a, __b);
}
__funline float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_rsqrtshf(__a, __b);
}
__funline float16_t vsubh_f16(float16_t __a, float16_t __b) {
return __a - __b;
}
__funline float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fmahf(__b, __c, __a);
}
__funline float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fnmahf(__b, __c, __a);
}
#pragma GCC pop_options
#undef FUNC
#endif /* __aarch64__ */
#endif /* _AARCH64_FP16_H_ */

455
third_party/aarch64/arm_fp16.internal.h vendored Normal file
View file

@ -0,0 +1,455 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _AARCH64_FP16_H_
#define _AARCH64_FP16_H_
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+fp16")
typedef __fp16 float16_t;
__funline float16_t
vabsh_f16 (float16_t __a)
{
return __builtin_aarch64_abshf (__a);
}
__funline uint16_t
vceqzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
}
__funline uint16_t
vcgezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
}
__funline uint16_t
vcgtzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
}
__funline uint16_t
vclezh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
}
__funline uint16_t
vcltzh_f16 (float16_t __a)
{
return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
}
__funline float16_t
vcvth_f16_s16 (int16_t __a)
{
return __builtin_aarch64_floathihf (__a);
}
__funline float16_t
vcvth_f16_s32 (int32_t __a)
{
return __builtin_aarch64_floatsihf (__a);
}
__funline float16_t
vcvth_f16_s64 (int64_t __a)
{
return __builtin_aarch64_floatdihf (__a);
}
__funline float16_t
vcvth_f16_u16 (uint16_t __a)
{
return __builtin_aarch64_floatunshihf_us (__a);
}
__funline float16_t
vcvth_f16_u32 (uint32_t __a)
{
return __builtin_aarch64_floatunssihf_us (__a);
}
__funline float16_t
vcvth_f16_u64 (uint64_t __a)
{
return __builtin_aarch64_floatunsdihf_us (__a);
}
__funline int16_t
vcvth_s16_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfhi (__a);
}
__funline int32_t
vcvth_s32_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfsi (__a);
}
__funline int64_t
vcvth_s64_f16 (float16_t __a)
{
return __builtin_aarch64_fix_trunchfdi (__a);
}
__funline uint16_t
vcvth_u16_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfhi_us (__a);
}
__funline uint32_t
vcvth_u32_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfsi_us (__a);
}
__funline uint64_t
vcvth_u64_f16 (float16_t __a)
{
return __builtin_aarch64_fixuns_trunchfdi_us (__a);
}
__funline int16_t
vcvtah_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfhi (__a);
}
__funline int32_t
vcvtah_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfsi (__a);
}
__funline int64_t
vcvtah_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lroundhfdi (__a);
}
__funline uint16_t
vcvtah_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfhi_us (__a);
}
__funline uint32_t
vcvtah_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfsi_us (__a);
}
__funline uint64_t
vcvtah_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lrounduhfdi_us (__a);
}
__funline int16_t
vcvtmh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfhi (__a);
}
__funline int32_t
vcvtmh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfsi (__a);
}
__funline int64_t
vcvtmh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfloorhfdi (__a);
}
__funline uint16_t
vcvtmh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfhi_us (__a);
}
__funline uint32_t
vcvtmh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfsi_us (__a);
}
__funline uint64_t
vcvtmh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lflooruhfdi_us (__a);
}
__funline int16_t
vcvtnh_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfhi (__a);
}
__funline int32_t
vcvtnh_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfsi (__a);
}
__funline int64_t
vcvtnh_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnhfdi (__a);
}
__funline uint16_t
vcvtnh_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfhi_us (__a);
}
__funline uint32_t
vcvtnh_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfsi_us (__a);
}
__funline uint64_t
vcvtnh_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lfrintnuhfdi_us (__a);
}
__funline int16_t
vcvtph_s16_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfhi (__a);
}
__funline int32_t
vcvtph_s32_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfsi (__a);
}
__funline int64_t
vcvtph_s64_f16 (float16_t __a)
{
return __builtin_aarch64_lceilhfdi (__a);
}
__funline uint16_t
vcvtph_u16_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfhi_us (__a);
}
__funline uint32_t
vcvtph_u32_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfsi_us (__a);
}
__funline uint64_t
vcvtph_u64_f16 (float16_t __a)
{
return __builtin_aarch64_lceiluhfdi_us (__a);
}
__funline float16_t
vnegh_f16 (float16_t __a)
{
return __builtin_aarch64_neghf (__a);
}
__funline float16_t
vrecpeh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpehf (__a);
}
__funline float16_t
vrecpxh_f16 (float16_t __a)
{
return __builtin_aarch64_frecpxhf (__a);
}
__funline float16_t
vrndh_f16 (float16_t __a)
{
return __builtin_aarch64_btrunchf (__a);
}
__funline float16_t
vrndah_f16 (float16_t __a)
{
return __builtin_aarch64_roundhf (__a);
}
__funline float16_t
vrndih_f16 (float16_t __a)
{
return __builtin_aarch64_nearbyinthf (__a);
}
__funline float16_t
vrndmh_f16 (float16_t __a)
{
return __builtin_aarch64_floorhf (__a);
}
__funline float16_t
vrndnh_f16 (float16_t __a)
{
return __builtin_aarch64_frintnhf (__a);
}
__funline float16_t
vrndph_f16 (float16_t __a)
{
return __builtin_aarch64_ceilhf (__a);
}
__funline float16_t
vrndxh_f16 (float16_t __a)
{
return __builtin_aarch64_rinthf (__a);
}
__funline float16_t
vrsqrteh_f16 (float16_t __a)
{
return __builtin_aarch64_rsqrtehf (__a);
}
__funline float16_t
vsqrth_f16 (float16_t __a)
{
return __builtin_aarch64_sqrthf (__a);
}
__funline float16_t
vaddh_f16 (float16_t __a, float16_t __b)
{
return __a + __b;
}
__funline float16_t
vabdh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fabdhf (__a, __b);
}
__funline uint16_t
vcageh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgehf_uss (__a, __b);
}
__funline uint16_t
vcagth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_facgthf_uss (__a, __b);
}
__funline uint16_t
vcaleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclehf_uss (__a, __b);
}
__funline uint16_t
vcalth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_faclthf_uss (__a, __b);
}
__funline uint16_t
vceqh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmeqhf_uss (__a, __b);
}
__funline uint16_t
vcgeh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgehf_uss (__a, __b);
}
__funline uint16_t
vcgth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmgthf_uss (__a, __b);
}
__funline uint16_t
vcleh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlehf_uss (__a, __b);
}
__funline uint16_t
vclth_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_cmlthf_uss (__a, __b);
}
__funline float16_t
vcvth_n_f16_s16 (int16_t __a, const int __b)
{
return __builtin_aarch64_scvtfhi (__a, __b);
}
__funline float16_t
vcvth_n_f16_s32 (int32_t __a, const int __b)
{
return __builtin_aarch64_scvtfsihf (__a, __b);
}
__funline float16_t
vcvth_n_f16_s64 (int64_t __a, const int __b)
{
return __builtin_aarch64_scvtfdihf (__a, __b);
}
__funline float16_t
vcvth_n_f16_u16 (uint16_t __a, const int __b)
{
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
}
__funline float16_t
vcvth_n_f16_u32 (uint32_t __a, const int __b)
{
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
}
__funline float16_t
vcvth_n_f16_u64 (uint64_t __a, const int __b)
{
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
}
__funline int16_t
vcvth_n_s16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshf (__a, __b);
}
__funline int32_t
vcvth_n_s32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfsi (__a, __b);
}
__funline int64_t
vcvth_n_s64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzshfdi (__a, __b);
}
__funline uint16_t
vcvth_n_u16_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
}
__funline uint32_t
vcvth_n_u32_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
}
__funline uint64_t
vcvth_n_u64_f16 (float16_t __a, const int __b)
{
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
}
__funline float16_t
vdivh_f16 (float16_t __a, float16_t __b)
{
return __a / __b;
}
__funline float16_t
vmaxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
vmaxnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmaxhf (__a, __b);
}
__funline float16_t
vminh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
vminnmh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fminhf (__a, __b);
}
__funline float16_t
vmulh_f16 (float16_t __a, float16_t __b)
{
return __a * __b;
}
__funline float16_t
vmulxh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_fmulxhf (__a, __b);
}
__funline float16_t
vrecpsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_frecpshf (__a, __b);
}
__funline float16_t
vrsqrtsh_f16 (float16_t __a, float16_t __b)
{
return __builtin_aarch64_rsqrtshf (__a, __b);
}
__funline float16_t
vsubh_f16 (float16_t __a, float16_t __b)
{
return __a - __b;
}
__funline float16_t
vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fmahf (__b, __c, __a);
}
__funline float16_t
vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
{
return __builtin_aarch64_fnmahf (__b, __c, __a);
}
#pragma GCC pop_options
#endif
#endif

File diff suppressed because it is too large Load diff

29402
third_party/aarch64/arm_neon.internal.h vendored Normal file

File diff suppressed because it is too large Load diff

11
third_party/aarch64/arm_sve.internal.h vendored Normal file
View file

@ -0,0 +1,11 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _ARM_SVE_H_
#define _ARM_SVE_H_
#include "third_party/aarch64/arm_bf16.internal.h"
typedef __fp16 float16_t;
typedef float float32_t;
typedef double float64_t;
#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h"
#endif
#endif

112
third_party/aarch64/openacc.internal.h vendored Normal file
View file

@ -0,0 +1,112 @@
/* clang-format off */
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _OPENACC_H
#define _OPENACC_H 1
#ifdef __cplusplus
extern "C" {
#endif
#if __cplusplus >= 201103
# define __GOACC_NOTHROW noexcept
#elif __cplusplus
# define __GOACC_NOTHROW throw ()
#else
# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
#endif
typedef enum acc_device_t {
acc_device_current = -1,
acc_device_none = 0,
acc_device_default = 1,
acc_device_host = 2,
acc_device_not_host = 4,
acc_device_nvidia = 5,
acc_device_radeon = 8,
_ACC_device_hwm,
_ACC_highest = __INT_MAX__,
_ACC_neg = -1
} acc_device_t;
typedef enum acc_device_property_t {
acc_property_memory = 1,
acc_property_free_memory = 2,
acc_property_name = 0x10001,
acc_property_vendor = 0x10002,
acc_property_driver = 0x10003
} acc_device_property_t;
typedef enum acc_async_t {
acc_async_noval = -1,
acc_async_sync = -2
} acc_async_t;
int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
size_t acc_get_property
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
const char *acc_get_property_string
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
int acc_async_test (int) __GOACC_NOTHROW;
int acc_async_test_all (void) __GOACC_NOTHROW;
void acc_wait (int) __GOACC_NOTHROW;
void acc_async_wait (int) __GOACC_NOTHROW;
void acc_wait_async (int, int) __GOACC_NOTHROW;
void acc_wait_all (void) __GOACC_NOTHROW;
void acc_async_wait_all (void) __GOACC_NOTHROW;
void acc_wait_all_async (int) __GOACC_NOTHROW;
void acc_init (acc_device_t) __GOACC_NOTHROW;
void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
#ifdef __cplusplus
int acc_on_device (int __arg) __GOACC_NOTHROW;
#else
int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
#endif
void *acc_malloc (size_t) __GOACC_NOTHROW;
void acc_free (void *) __GOACC_NOTHROW;
void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
void *acc_pcopyin (void *, size_t) __GOACC_NOTHROW;
void *acc_create (void *, size_t) __GOACC_NOTHROW;
void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
void *acc_pcreate (void *, size_t) __GOACC_NOTHROW;
void acc_copyout (void *, size_t) __GOACC_NOTHROW;
void acc_delete (void *, size_t) __GOACC_NOTHROW;
void acc_update_device (void *, size_t) __GOACC_NOTHROW;
void acc_update_self (void *, size_t) __GOACC_NOTHROW;
void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
void acc_unmap_data (void *) __GOACC_NOTHROW;
void *acc_deviceptr (void *) __GOACC_NOTHROW;
void *acc_hostptr (void *) __GOACC_NOTHROW;
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_attach (void **) __GOACC_NOTHROW;
void acc_attach_async (void **, int) __GOACC_NOTHROW;
void acc_detach (void **) __GOACC_NOTHROW;
void acc_detach_async (void **, int) __GOACC_NOTHROW;
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_detach_finalize (void **) __GOACC_NOTHROW;
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
#ifdef __cplusplus
}
#pragma acc routine seq
inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
{
return acc_on_device ((int) __arg);
}
#endif
#endif
#endif

72
third_party/aarch64/upgrade.sh vendored Executable file
View file

@ -0,0 +1,72 @@
#!/bin/sh
# /opt/aarch64o2/lib/gcc/aarch64-linux-musl/9.2.0/include
# /opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
# IMPORTANT NOTES
#
# 1. You also need:
# #pragma GCC diagnostic ignored "-Wmissing-braces"
# In third_party/aarch64/arm_neon.internal.h
#
# 2. You have to rewrite arm_fp16 to use `__funline`.
#
# 3. You should fix up the `#pragma GCC aarch64` things.
#
s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
d=third_party/aarch64
FILES='
arm_acle
arm_fp16
arm_neon
acc_prof
arm_bf16
arm_sve
acc_prof
openacc
'
strip_c_comments() {
# https://stackoverflow.com/a/13062682/1653720
[ $# -eq 2 ] && arg="$1" || arg=""
eval file="\$$#"
sed 's/a/aA/g; s/__/aB/g; s/#/aC/g' "$file" |
gcc -P -E $arg - |
sed 's/aC/#/g; s/aB/__/g; s/aA/a/g'
}
rm -f third_party/aarch64/*.h
for f in $FILES; do
echo cp $s/$f.h $d/$f.internal.h
cp $s/$f.h $d/$f.internal.h || exit
done
sed -i \
-e 's/# *include/#include/' \
-e '/#include .std/d' \
-e 's!#include [<"]!#include "third_party/aarch64/!' \
-e 's!\.h[>"]$!.internal.h"!' \
third_party/aarch64/*.h
# solve the pedantic gcc linter warning `'vmulxh_f16' is static but used
# in inline function 'vmulxh_laneq_f16' which is not static [-Werror]`
sed -i \
-e 's/static/extern/g' \
third_party/aarch64/arm_fp16.internal.h
for f in third_party/aarch64/*.h; do
strip_c_comments $f >$f.tmp || exit
mv $f.tmp $f
done
for f in third_party/aarch64/*.h; do
(
printf %s\\n '/* clang-format off */'
printf %s\\n '#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)'
cat $f
printf %s\\n '#endif'
) >$f.tmp
mv $f.tmp $f
done

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,6 @@
#!/bin/sh
ARCH=${1:-x86_64}
ARCH=${1:-aarch64}
IMPORT=${2:-/opt/cross11portcosmo}
PREFIX=third_party/gcc/
OLDVERSION=9.2.0

View file

@ -29,7 +29,7 @@
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "libc/str/str.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q8_0.h"
#include "third_party/intel/immintrin.internal.h"

View file

@ -29,7 +29,7 @@
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "libc/str/str.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q4_1.h"
#include "third_party/ggml/ggjt.v1.q8_1.h"

View file

@ -29,7 +29,7 @@
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "libc/str/str.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/fp16.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q8_0.h"

View file

@ -28,7 +28,7 @@
#include "third_party/ggml/ggjt.v1.q5_0.h"
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/fp16.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q8_0.h"

View file

@ -20,7 +20,7 @@
#include "libc/assert.h"
#include "libc/math.h"
#include "libc/str/str.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/fp16.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q8_1.h"

View file

@ -19,7 +19,7 @@
#include "third_party/ggml/ggjt.v1.q8_0.h"
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/ggjt.v1.internal.h"
#include "third_party/ggml/ggjt.v1.q8_0.h"
#include "third_party/intel/immintrin.internal.h"

View file

@ -19,7 +19,7 @@
#include "third_party/ggml/ggjt.v1.q8_1.h"
#include "libc/assert.h"
#include "libc/macros.internal.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/ggml/ggml.h"
#include "third_party/intel/immintrin.internal.h"
#include "third_party/libcxx/math.h"

View file

@ -1,7 +1,7 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_GGML_GGJT_V2_INTERNAL_H_
#define COSMOPOLITAN_THIRD_PARTY_GGML_GGJT_V2_INTERNAL_H_
#include "libc/str/str.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/immintrin.internal.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_

View file

@ -39,7 +39,7 @@
#include "libc/sysv/consts/clock.h"
#include "libc/thread/thread.h"
#include "libc/time/time.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/immintrin.internal.h"
#include "libc/macros.internal.h"
#include "libc/tinymath/magicu.h"

View file

@ -192,6 +192,8 @@ o/$(MODE)/third_party/ggml/companionai.txt.zip.o: private \
ZIPOBJ_FLAGS += \
-B
o/$(MODE)/third_party/ggml/ggml.o: private QUOTA = -C64
################################################################################
THIRD_PARTY_GGML_COMS = \

View file

@ -32,7 +32,7 @@
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/x/x.h"
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/ammintrin.internal.h"
asm(".ident\t\"\\n\\n\

View file

@ -203,7 +203,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */
#elif defined(ADLER32_SIMD_NEON)
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
uint32_t ZLIB_INTERNAL adler32_simd_( /* NEON */
uint32_t adler,

View file

@ -28,7 +28,7 @@
#endif
#if defined(INFLATE_CHUNK_SIMD_NEON)
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_neon.internal.h"
typedef uint8x16_t z_vec128i_t;
#elif defined(INFLATE_CHUNK_SIMD_SSE2)
#include "third_party/intel/emmintrin.internal.h"

View file

@ -10,6 +10,7 @@ Chromium (BSD-3 License)\\n\
Copyright 2017 The Chromium Authors\"");
// clang-format off
#include "third_party/intel/x86gprintrin.internal.h"
#include "third_party/zlib/crc32_simd.internal.h"
#if defined(CRC32_SIMD_AVX512_PCLMUL)
@ -358,8 +359,8 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
/* We need some extra types for using PMULL.
*/
#if defined(__aarch64__)
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_acle.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/aarch64/arm_acle.internal.h"
#endif
/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an
@ -400,8 +401,8 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not
* allowed. We can just include arm_acle.h.
*/
#include "third_party/aarch64/arm_neon.h"
#include "third_party/aarch64/arm_acle.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/aarch64/arm_acle.internal.h"
#define TARGET_ARMV8_WITH_CRC
#else // !defined(__GNUC__) && !defined(_aarch64__)
#error ARM CRC32 SIMD extensions only supported for Clang and GCC

View file

@ -41,7 +41,7 @@ typedef __m128i z_vec128i_u16x8_t;
#elif defined(DEFLATE_SLIDE_HASH_NEON)
#include "third_party/aarch64/arm_neon.h" /* NEON */
#include "third_party/aarch64/arm_neon.internal.h" /* NEON */
#define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize))