mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 06:53:33 +00:00
Upgrade to Cosmopolitan GCC 11.2.0 for aarch64
This commit is contained in:
parent
39f20dbb13
commit
9cc3e37263
63 changed files with 30429 additions and 22750 deletions
|
@ -203,7 +203,8 @@ ifeq ($(ARCH), aarch64)
|
|||
#
|
||||
DEFAULT_COPTS += \
|
||||
-ffixed-x18 \
|
||||
-ffixed-x28
|
||||
-ffixed-x28 \
|
||||
-mno-outline-atomics
|
||||
endif
|
||||
|
||||
MATHEMATICAL = \
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "dsp/core/core.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
/**
|
||||
|
|
|
@ -190,6 +190,7 @@ o/$(MODE)/libc/calls/unveil.o: private \
|
|||
|
||||
ifeq ($(ARCH), aarch64)
|
||||
o/$(MODE)/libc/calls/sigaction.o: private OVERRIDE_CFLAGS += -mcmodel=large
|
||||
o/$(MODE)/libc/calls/getloadavg-nt.o: private OVERRIDE_CFLAGS += -ffreestanding
|
||||
endif
|
||||
|
||||
# we want -Os because:
|
||||
|
|
165
third_party/aarch64/acc_prof.internal.h
vendored
Normal file
165
third_party/aarch64/acc_prof.internal.h
vendored
Normal file
|
@ -0,0 +1,165 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _ACC_PROF_H
|
||||
#define _ACC_PROF_H 1
|
||||
#include "third_party/aarch64/openacc.internal.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef enum acc_event_t
|
||||
{
|
||||
acc_ev_none = 0,
|
||||
acc_ev_device_init_start,
|
||||
acc_ev_device_init_end,
|
||||
acc_ev_device_shutdown_start,
|
||||
acc_ev_device_shutdown_end,
|
||||
acc_ev_runtime_shutdown,
|
||||
acc_ev_create,
|
||||
acc_ev_delete,
|
||||
acc_ev_alloc,
|
||||
acc_ev_free,
|
||||
acc_ev_enter_data_start,
|
||||
acc_ev_enter_data_end,
|
||||
acc_ev_exit_data_start,
|
||||
acc_ev_exit_data_end,
|
||||
acc_ev_update_start,
|
||||
acc_ev_update_end,
|
||||
acc_ev_compute_construct_start,
|
||||
acc_ev_compute_construct_end,
|
||||
acc_ev_enqueue_launch_start,
|
||||
acc_ev_enqueue_launch_end,
|
||||
acc_ev_enqueue_upload_start,
|
||||
acc_ev_enqueue_upload_end,
|
||||
acc_ev_enqueue_download_start,
|
||||
acc_ev_enqueue_download_end,
|
||||
acc_ev_wait_start,
|
||||
acc_ev_wait_end,
|
||||
acc_ev_last
|
||||
} acc_event_t;
|
||||
typedef signed long int _acc_prof_ssize_t;
|
||||
typedef unsigned long int _acc_prof_size_t;
|
||||
typedef int _acc_prof_int_t;
|
||||
#define _ACC_PROF_VALID_BYTES_STRUCT(_struct, _lastfield, _valid_bytes_lastfield) offsetof (_struct, _lastfield) + (_valid_bytes_lastfield)
|
||||
#if 0
|
||||
#define _ACC_PROF_VALID_BYTES_TYPE_N(_type, _n, _valid_bytes_type) ((_n - 1) * sizeof (_type) + (_valid_bytes_type))
|
||||
#endif
|
||||
#define _ACC_PROF_VALID_BYTES_BASICTYPE(_basictype) (sizeof (_basictype))
|
||||
typedef struct acc_prof_info
|
||||
{
|
||||
acc_event_t event_type;
|
||||
_acc_prof_int_t valid_bytes;
|
||||
_acc_prof_int_t version;
|
||||
acc_device_t device_type;
|
||||
_acc_prof_int_t device_number;
|
||||
_acc_prof_int_t thread_id;
|
||||
_acc_prof_ssize_t async;
|
||||
_acc_prof_ssize_t async_queue;
|
||||
const char *src_file;
|
||||
const char *func_name;
|
||||
_acc_prof_int_t line_no, end_line_no;
|
||||
_acc_prof_int_t func_line_no, func_end_line_no;
|
||||
#define _ACC_PROF_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_prof_info, func_end_line_no, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_int_t))
|
||||
} acc_prof_info;
|
||||
#define _ACC_PROF_INFO_VERSION 201711
|
||||
typedef enum acc_construct_t
|
||||
{
|
||||
acc_construct_parallel = 0,
|
||||
acc_construct_kernels,
|
||||
acc_construct_loop,
|
||||
acc_construct_data,
|
||||
acc_construct_enter_data,
|
||||
acc_construct_exit_data,
|
||||
acc_construct_host_data,
|
||||
acc_construct_atomic,
|
||||
acc_construct_declare,
|
||||
acc_construct_init,
|
||||
acc_construct_shutdown,
|
||||
acc_construct_set,
|
||||
acc_construct_update,
|
||||
acc_construct_routine,
|
||||
acc_construct_wait,
|
||||
acc_construct_runtime_api,
|
||||
acc_construct_serial
|
||||
} acc_construct_t;
|
||||
typedef struct acc_data_event_info
|
||||
{
|
||||
acc_event_t event_type;
|
||||
_acc_prof_int_t valid_bytes;
|
||||
acc_construct_t parent_construct;
|
||||
_acc_prof_int_t implicit;
|
||||
void *tool_info;
|
||||
const char *var_name;
|
||||
_acc_prof_size_t bytes;
|
||||
const void *host_ptr;
|
||||
const void *device_ptr;
|
||||
#define _ACC_DATA_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_data_event_info, device_ptr, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
|
||||
} acc_data_event_info;
|
||||
typedef struct acc_launch_event_info
|
||||
{
|
||||
acc_event_t event_type;
|
||||
_acc_prof_int_t valid_bytes;
|
||||
acc_construct_t parent_construct;
|
||||
_acc_prof_int_t implicit;
|
||||
void *tool_info;
|
||||
const char *kernel_name;
|
||||
_acc_prof_size_t num_gangs, num_workers, vector_length;
|
||||
#define _ACC_LAUNCH_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_launch_event_info, vector_length, _ACC_PROF_VALID_BYTES_BASICTYPE (_acc_prof_size_t))
|
||||
} acc_launch_event_info;
|
||||
typedef struct acc_other_event_info
|
||||
{
|
||||
acc_event_t event_type;
|
||||
_acc_prof_int_t valid_bytes;
|
||||
acc_construct_t parent_construct;
|
||||
_acc_prof_int_t implicit;
|
||||
void *tool_info;
|
||||
#define _ACC_OTHER_EVENT_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_other_event_info, tool_info, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
|
||||
} acc_other_event_info;
|
||||
typedef union acc_event_info
|
||||
{
|
||||
acc_event_t event_type;
|
||||
acc_data_event_info data_event;
|
||||
acc_launch_event_info launch_event;
|
||||
acc_other_event_info other_event;
|
||||
} acc_event_info;
|
||||
typedef enum acc_device_api
|
||||
{
|
||||
acc_device_api_none = 0,
|
||||
acc_device_api_cuda,
|
||||
acc_device_api_opencl,
|
||||
acc_device_api_coi,
|
||||
acc_device_api_other
|
||||
} acc_device_api;
|
||||
typedef struct acc_api_info
|
||||
{
|
||||
acc_device_api device_api;
|
||||
_acc_prof_int_t valid_bytes;
|
||||
acc_device_t device_type;
|
||||
_acc_prof_int_t vendor;
|
||||
const void *device_handle;
|
||||
const void *context_handle;
|
||||
const void *async_handle;
|
||||
#define _ACC_API_INFO_VALID_BYTES _ACC_PROF_VALID_BYTES_STRUCT (acc_api_info, async_handle, _ACC_PROF_VALID_BYTES_BASICTYPE (void *))
|
||||
} acc_api_info;
|
||||
typedef void (*acc_prof_callback) (acc_prof_info *, acc_event_info *,
|
||||
acc_api_info *);
|
||||
typedef enum acc_register_t
|
||||
{
|
||||
acc_reg = 0,
|
||||
acc_toggle = 1,
|
||||
acc_toggle_per_thread = 2
|
||||
} acc_register_t;
|
||||
typedef void (*acc_prof_reg) (acc_event_t, acc_prof_callback, acc_register_t);
|
||||
extern void acc_prof_register (acc_event_t, acc_prof_callback,
|
||||
acc_register_t) __GOACC_NOTHROW;
|
||||
extern void acc_prof_unregister (acc_event_t, acc_prof_callback,
|
||||
acc_register_t) __GOACC_NOTHROW;
|
||||
typedef void (*acc_query_fn) ();
|
||||
typedef acc_query_fn (*acc_prof_lookup_func) (const char *);
|
||||
extern acc_query_fn acc_prof_lookup (const char *) __GOACC_NOTHROW;
|
||||
extern void acc_register_library (acc_prof_reg, acc_prof_reg,
|
||||
acc_prof_lookup_func);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
63
third_party/aarch64/arm_acle.h
vendored
63
third_party/aarch64/arm_acle.h
vendored
|
@ -1,63 +0,0 @@
|
|||
#ifndef _GCC_ARM_ACLE_H
|
||||
#define _GCC_ARM_ACLE_H
|
||||
#ifdef __aarch64__
|
||||
#include "libc/inttypes.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/literal.h"
|
||||
|
||||
#pragma GCC push_options
|
||||
|
||||
#pragma GCC target("+nothing+crc")
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32b(uint32_t __a, uint8_t __b) {
|
||||
return __builtin_aarch64_crc32b(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32cb(uint32_t __a, uint8_t __b) {
|
||||
return __builtin_aarch64_crc32cb(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32ch(uint32_t __a, uint16_t __b) {
|
||||
return __builtin_aarch64_crc32ch(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32cw(uint32_t __a, uint32_t __b) {
|
||||
return __builtin_aarch64_crc32cw(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32cd(uint32_t __a, uint64_t __b) {
|
||||
return __builtin_aarch64_crc32cx(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32h(uint32_t __a, uint16_t __b) {
|
||||
return __builtin_aarch64_crc32h(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32w(uint32_t __a, uint32_t __b) {
|
||||
return __builtin_aarch64_crc32w(__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__((__always_inline__))
|
||||
__crc32d(uint32_t __a, uint64_t __b) {
|
||||
return __builtin_aarch64_crc32x(__a, __b);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#endif /* __aarch64__ */
|
||||
#endif
|
164
third_party/aarch64/arm_acle.internal.h
vendored
Normal file
164
third_party/aarch64/arm_acle.internal.h
vendored
Normal file
|
@ -0,0 +1,164 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _GCC_ARM_ACLE_H
|
||||
#define _GCC_ARM_ACLE_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("arch=armv8.3-a")
|
||||
__funline int32_t
|
||||
__jcvt (double __a)
|
||||
{
|
||||
return __builtin_aarch64_jcvtzs (__a);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("arch=armv8.5-a")
|
||||
__funline float
|
||||
__rint32zf (float __a)
|
||||
{
|
||||
return __builtin_aarch64_frint32zsf (__a);
|
||||
}
|
||||
__funline double
|
||||
__rint32z (double __a)
|
||||
{
|
||||
return __builtin_aarch64_frint32zdf (__a);
|
||||
}
|
||||
__funline float
|
||||
__rint64zf (float __a)
|
||||
{
|
||||
return __builtin_aarch64_frint64zsf (__a);
|
||||
}
|
||||
__funline double
|
||||
__rint64z (double __a)
|
||||
{
|
||||
return __builtin_aarch64_frint64zdf (__a);
|
||||
}
|
||||
__funline float
|
||||
__rint32xf (float __a)
|
||||
{
|
||||
return __builtin_aarch64_frint32xsf (__a);
|
||||
}
|
||||
__funline double
|
||||
__rint32x (double __a)
|
||||
{
|
||||
return __builtin_aarch64_frint32xdf (__a);
|
||||
}
|
||||
__funline float
|
||||
__rint64xf (float __a)
|
||||
{
|
||||
return __builtin_aarch64_frint64xsf (__a);
|
||||
}
|
||||
__funline double
|
||||
__rint64x (double __a)
|
||||
{
|
||||
return __builtin_aarch64_frint64xdf (__a);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("+nothing+crc")
|
||||
__funline uint32_t
|
||||
__crc32b (uint32_t __a, uint8_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32b (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32cb (uint32_t __a, uint8_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32cb (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32ch (uint32_t __a, uint16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32ch (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32cw (uint32_t __a, uint32_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32cw (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32cd (uint32_t __a, uint64_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32cx (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32h (uint32_t __a, uint16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32h (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32w (uint32_t __a, uint32_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32w (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
__crc32d (uint32_t __a, uint64_t __b)
|
||||
{
|
||||
return __builtin_aarch64_crc32x (__a, __b);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#ifdef __ARM_FEATURE_TME
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("+nothing+tme")
|
||||
#define _TMFAILURE_REASON 0x00007fffu
|
||||
#define _TMFAILURE_RTRY 0x00008000u
|
||||
#define _TMFAILURE_CNCL 0x00010000u
|
||||
#define _TMFAILURE_MEM 0x00020000u
|
||||
#define _TMFAILURE_IMP 0x00040000u
|
||||
#define _TMFAILURE_ERR 0x00080000u
|
||||
#define _TMFAILURE_SIZE 0x00100000u
|
||||
#define _TMFAILURE_NEST 0x00200000u
|
||||
#define _TMFAILURE_DBG 0x00400000u
|
||||
#define _TMFAILURE_INT 0x00800000u
|
||||
#define _TMFAILURE_TRIVIAL 0x01000000u
|
||||
__funline uint64_t
|
||||
__tstart (void)
|
||||
{
|
||||
return __builtin_aarch64_tstart ();
|
||||
}
|
||||
__funline void
|
||||
__tcommit (void)
|
||||
{
|
||||
__builtin_aarch64_tcommit ();
|
||||
}
|
||||
__funline void
|
||||
__tcancel (const uint64_t __reason)
|
||||
{
|
||||
__builtin_aarch64_tcancel (__reason);
|
||||
}
|
||||
__funline uint64_t
|
||||
__ttest (void)
|
||||
{
|
||||
return __builtin_aarch64_ttest ();
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("+nothing+rng")
|
||||
__funline int
|
||||
__rndr (uint64_t *__res)
|
||||
{
|
||||
return __builtin_aarch64_rndr (__res);
|
||||
}
|
||||
__funline int
|
||||
__rndrrs (uint64_t *__res)
|
||||
{
|
||||
return __builtin_aarch64_rndrrs (__res);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("arch=armv8.5-a+memtag")
|
||||
#define __arm_mte_create_random_tag(__ptr, __u64_mask) __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
|
||||
#define __arm_mte_exclude_tag(__ptr, __u64_excluded) __builtin_aarch64_memtag_gmi(__ptr, __u64_excluded)
|
||||
#define __arm_mte_ptrdiff(__ptr_a, __ptr_b) __builtin_aarch64_memtag_subp(__ptr_a, __ptr_b)
|
||||
#define __arm_mte_increment_tag(__ptr, __u_offset) __builtin_aarch64_memtag_inc_tag(__ptr, __u_offset)
|
||||
#define __arm_mte_set_tag(__tagged_address) __builtin_aarch64_memtag_set_tag(__tagged_address)
|
||||
#define __arm_mte_get_tag(__address) __builtin_aarch64_memtag_get_tag(__address)
|
||||
#pragma GCC pop_options
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
23
third_party/aarch64/arm_bf16.internal.h
vendored
Normal file
23
third_party/aarch64/arm_bf16.internal.h
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _AARCH64_BF16_H_
|
||||
#define _AARCH64_BF16_H_
|
||||
typedef __bf16 bfloat16_t;
|
||||
typedef float float32_t;
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("+nothing+bf16+nosimd")
|
||||
__extension__ extern __inline bfloat16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvth_bf16_f32 (float32_t __a)
|
||||
{
|
||||
return __builtin_aarch64_bfcvtbf (__a);
|
||||
}
|
||||
__extension__ extern __inline float32_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcvtah_f32_bf16 (bfloat16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_bfcvtsf (__a);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
373
third_party/aarch64/arm_fp16.h
vendored
373
third_party/aarch64/arm_fp16.h
vendored
|
@ -1,373 +0,0 @@
|
|||
#ifndef _AARCH64_FP16_H_
|
||||
#define _AARCH64_FP16_H_
|
||||
#ifdef __aarch64__
|
||||
#include "libc/inttypes.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/literal.h"
|
||||
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("arch=armv8.2-a+fp16")
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
|
||||
__funline float16_t vabsh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_abshf(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vceqzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmeqhf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
__funline uint16_t vcgezh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmgehf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
__funline uint16_t vcgtzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmgthf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
__funline uint16_t vclezh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmlehf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
__funline uint16_t vcltzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmlthf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_s16(int16_t __a) {
|
||||
return __builtin_aarch64_floathihf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_s32(int32_t __a) {
|
||||
return __builtin_aarch64_floatsihf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_s64(int64_t __a) {
|
||||
return __builtin_aarch64_floatdihf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_u16(uint16_t __a) {
|
||||
return __builtin_aarch64_floatunshihf_us(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_u32(uint32_t __a) {
|
||||
return __builtin_aarch64_floatunssihf_us(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_f16_u64(uint64_t __a) {
|
||||
return __builtin_aarch64_floatunsdihf_us(__a);
|
||||
}
|
||||
|
||||
__funline int16_t vcvth_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfhi(__a);
|
||||
}
|
||||
|
||||
__funline int32_t vcvth_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfsi(__a);
|
||||
}
|
||||
|
||||
__funline int64_t vcvth_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfdi(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvth_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfhi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvth_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfsi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvth_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfdi_us(__a);
|
||||
}
|
||||
|
||||
__funline int16_t vcvtah_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfhi(__a);
|
||||
}
|
||||
|
||||
__funline int32_t vcvtah_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfsi(__a);
|
||||
}
|
||||
|
||||
__funline int64_t vcvtah_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfdi(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvtah_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfhi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvtah_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfsi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvtah_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfdi_us(__a);
|
||||
}
|
||||
|
||||
__funline int16_t vcvtmh_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfhi(__a);
|
||||
}
|
||||
|
||||
__funline int32_t vcvtmh_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfsi(__a);
|
||||
}
|
||||
|
||||
__funline int64_t vcvtmh_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfdi(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvtmh_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfhi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvtmh_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfsi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvtmh_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfdi_us(__a);
|
||||
}
|
||||
|
||||
__funline int16_t vcvtnh_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfhi(__a);
|
||||
}
|
||||
|
||||
__funline int32_t vcvtnh_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfsi(__a);
|
||||
}
|
||||
|
||||
__funline int64_t vcvtnh_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfdi(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvtnh_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfhi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvtnh_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfsi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvtnh_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfdi_us(__a);
|
||||
}
|
||||
|
||||
__funline int16_t vcvtph_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfhi(__a);
|
||||
}
|
||||
|
||||
__funline int32_t vcvtph_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfsi(__a);
|
||||
}
|
||||
|
||||
__funline int64_t vcvtph_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfdi(__a);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvtph_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfhi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvtph_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfsi_us(__a);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvtph_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfdi_us(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vnegh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_neghf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrecpeh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frecpehf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrecpxh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frecpxhf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_btrunchf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndah_f16(float16_t __a) {
|
||||
return __builtin_aarch64_roundhf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndih_f16(float16_t __a) {
|
||||
return __builtin_aarch64_nearbyinthf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndmh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_floorhf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndnh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frintnhf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndph_f16(float16_t __a) {
|
||||
return __builtin_aarch64_ceilhf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrndxh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_rinthf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vrsqrteh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_rsqrtehf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vsqrth_f16(float16_t __a) {
|
||||
return __builtin_aarch64_sqrthf(__a);
|
||||
}
|
||||
|
||||
__funline float16_t vaddh_f16(float16_t __a, float16_t __b) {
|
||||
return __a + __b;
|
||||
}
|
||||
|
||||
__funline float16_t vabdh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fabdhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcageh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_facgehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcagth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_facgthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_faclehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcalth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_faclthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vceqh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmeqhf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmgehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcgth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmgthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcleh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmlehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vclth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmlthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfhi(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfsihf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfdihf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfhi_sus(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfsihf_sus(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfdihf_sus(__a, __b);
|
||||
}
|
||||
|
||||
__funline int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshf(__a, __b);
|
||||
}
|
||||
|
||||
__funline int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshfsi(__a, __b);
|
||||
}
|
||||
|
||||
__funline int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshfdi(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhf_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhfsi_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhfdi_uss(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vdivh_f16(float16_t __a, float16_t __b) {
|
||||
return __a / __b;
|
||||
}
|
||||
|
||||
__funline float16_t vmaxh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmaxhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmaxhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vminh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fminhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vminnmh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fminhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vmulh_f16(float16_t __a, float16_t __b) {
|
||||
return __a * __b;
|
||||
}
|
||||
|
||||
__funline float16_t vmulxh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmulxhf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_frecpshf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_rsqrtshf(__a, __b);
|
||||
}
|
||||
|
||||
__funline float16_t vsubh_f16(float16_t __a, float16_t __b) {
|
||||
return __a - __b;
|
||||
}
|
||||
|
||||
__funline float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
return __builtin_aarch64_fmahf(__b, __c, __a);
|
||||
}
|
||||
|
||||
__funline float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
return __builtin_aarch64_fnmahf(__b, __c, __a);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#undef FUNC
|
||||
#endif /* __aarch64__ */
|
||||
#endif /* _AARCH64_FP16_H_ */
|
455
third_party/aarch64/arm_fp16.internal.h
vendored
Normal file
455
third_party/aarch64/arm_fp16.internal.h
vendored
Normal file
|
@ -0,0 +1,455 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _AARCH64_FP16_H_
|
||||
#define _AARCH64_FP16_H_
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("arch=armv8.2-a+fp16")
|
||||
typedef __fp16 float16_t;
|
||||
__funline float16_t
|
||||
vabsh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_abshf (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vceqzh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_cmeqhf_uss (__a, 0.0f);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcgezh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_cmgehf_uss (__a, 0.0f);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcgtzh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_cmgthf_uss (__a, 0.0f);
|
||||
}
|
||||
__funline uint16_t
|
||||
vclezh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_cmlehf_uss (__a, 0.0f);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcltzh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_cmlthf_uss (__a, 0.0f);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_s16 (int16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floathihf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_s32 (int32_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floatsihf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_s64 (int64_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floatdihf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_u16 (uint16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floatunshihf_us (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_u32 (uint32_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floatunssihf_us (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_f16_u64 (uint64_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floatunsdihf_us (__a);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvth_s16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fix_trunchfhi (__a);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvth_s32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fix_trunchfsi (__a);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvth_s64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fix_trunchfdi (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvth_u16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fixuns_trunchfhi_us (__a);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvth_u32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fixuns_trunchfsi_us (__a);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvth_u64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_fixuns_trunchfdi_us (__a);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvtah_s16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lroundhfhi (__a);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvtah_s32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lroundhfsi (__a);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvtah_s64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lroundhfdi (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvtah_u16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lrounduhfhi_us (__a);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvtah_u32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lrounduhfsi_us (__a);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvtah_u64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lrounduhfdi_us (__a);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvtmh_s16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfloorhfhi (__a);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvtmh_s32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfloorhfsi (__a);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvtmh_s64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfloorhfdi (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvtmh_u16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lflooruhfhi_us (__a);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvtmh_u32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lflooruhfsi_us (__a);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvtmh_u64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lflooruhfdi_us (__a);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvtnh_s16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnhfhi (__a);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvtnh_s32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnhfsi (__a);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvtnh_s64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnhfdi (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvtnh_u16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnuhfhi_us (__a);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvtnh_u32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnuhfsi_us (__a);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvtnh_u64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lfrintnuhfdi_us (__a);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvtph_s16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceilhfhi (__a);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvtph_s32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceilhfsi (__a);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvtph_s64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceilhfdi (__a);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvtph_u16_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceiluhfhi_us (__a);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvtph_u32_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceiluhfsi_us (__a);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvtph_u64_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_lceiluhfdi_us (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vnegh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_neghf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrecpeh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_frecpehf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrecpxh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_frecpxhf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_btrunchf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndah_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_roundhf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndih_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_nearbyinthf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndmh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_floorhf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndnh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_frintnhf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndph_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_ceilhf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrndxh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_rinthf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vrsqrteh_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_rsqrtehf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vsqrth_f16 (float16_t __a)
|
||||
{
|
||||
return __builtin_aarch64_sqrthf (__a);
|
||||
}
|
||||
__funline float16_t
|
||||
vaddh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a + __b;
|
||||
}
|
||||
__funline float16_t
|
||||
vabdh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fabdhf (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcageh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_facgehf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcagth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_facgthf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcaleh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_faclehf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcalth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_faclthf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vceqh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmeqhf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcgeh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmgehf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcgth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmgthf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcleh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmlehf_uss (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vclth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmlthf_uss (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_s16 (int16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfhi (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_s32 (int32_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfsihf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_s64 (int64_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfdihf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_u16 (uint16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_u32 (uint32_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vcvth_n_f16_u64 (uint64_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
|
||||
}
|
||||
__funline int16_t
|
||||
vcvth_n_s16_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshf (__a, __b);
|
||||
}
|
||||
__funline int32_t
|
||||
vcvth_n_s32_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshfsi (__a, __b);
|
||||
}
|
||||
__funline int64_t
|
||||
vcvth_n_s64_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshfdi (__a, __b);
|
||||
}
|
||||
__funline uint16_t
|
||||
vcvth_n_u16_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
|
||||
}
|
||||
__funline uint32_t
|
||||
vcvth_n_u32_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
|
||||
}
|
||||
__funline uint64_t
|
||||
vcvth_n_u64_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vdivh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a / __b;
|
||||
}
|
||||
__funline float16_t
|
||||
vmaxh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmaxhf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vmaxnmh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmaxhf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vminh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fminhf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vminnmh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fminhf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vmulh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a * __b;
|
||||
}
|
||||
__funline float16_t
|
||||
vmulxh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmulxhf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vrecpsh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_frecpshf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vrsqrtsh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_rsqrtshf (__a, __b);
|
||||
}
|
||||
__funline float16_t
|
||||
vsubh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a - __b;
|
||||
}
|
||||
__funline float16_t
|
||||
vfmah_f16 (float16_t __a, float16_t __b, float16_t __c)
|
||||
{
|
||||
return __builtin_aarch64_fmahf (__b, __c, __a);
|
||||
}
|
||||
__funline float16_t
|
||||
vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c)
|
||||
{
|
||||
return __builtin_aarch64_fnmahf (__b, __c, __a);
|
||||
}
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
22294
third_party/aarch64/arm_neon.h
vendored
22294
third_party/aarch64/arm_neon.h
vendored
File diff suppressed because it is too large
Load diff
29402
third_party/aarch64/arm_neon.internal.h
vendored
Normal file
29402
third_party/aarch64/arm_neon.internal.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
11
third_party/aarch64/arm_sve.internal.h
vendored
Normal file
11
third_party/aarch64/arm_sve.internal.h
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _ARM_SVE_H_
|
||||
#define _ARM_SVE_H_
|
||||
#include "third_party/aarch64/arm_bf16.internal.h"
|
||||
typedef __fp16 float16_t;
|
||||
typedef float float32_t;
|
||||
typedef double float64_t;
|
||||
#pragma GCC aarch64 "third_party/aarch64/arm_sve.internal.h"
|
||||
#endif
|
||||
#endif
|
112
third_party/aarch64/openacc.internal.h
vendored
Normal file
112
third_party/aarch64/openacc.internal.h
vendored
Normal file
|
@ -0,0 +1,112 @@
|
|||
/* clang-format off */
|
||||
#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _OPENACC_H
|
||||
#define _OPENACC_H 1
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#if __cplusplus >= 201103
|
||||
# define __GOACC_NOTHROW noexcept
|
||||
#elif __cplusplus
|
||||
# define __GOACC_NOTHROW throw ()
|
||||
#else
|
||||
# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
|
||||
#endif
|
||||
typedef enum acc_device_t {
|
||||
acc_device_current = -1,
|
||||
acc_device_none = 0,
|
||||
acc_device_default = 1,
|
||||
acc_device_host = 2,
|
||||
acc_device_not_host = 4,
|
||||
acc_device_nvidia = 5,
|
||||
acc_device_radeon = 8,
|
||||
_ACC_device_hwm,
|
||||
_ACC_highest = __INT_MAX__,
|
||||
_ACC_neg = -1
|
||||
} acc_device_t;
|
||||
typedef enum acc_device_property_t {
|
||||
acc_property_memory = 1,
|
||||
acc_property_free_memory = 2,
|
||||
acc_property_name = 0x10001,
|
||||
acc_property_vendor = 0x10002,
|
||||
acc_property_driver = 0x10003
|
||||
} acc_device_property_t;
|
||||
typedef enum acc_async_t {
|
||||
acc_async_noval = -1,
|
||||
acc_async_sync = -2
|
||||
} acc_async_t;
|
||||
int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
|
||||
void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
|
||||
acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
|
||||
void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
|
||||
int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
|
||||
size_t acc_get_property
|
||||
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
|
||||
const char *acc_get_property_string
|
||||
(int, acc_device_t, acc_device_property_t) __GOACC_NOTHROW;
|
||||
int acc_async_test (int) __GOACC_NOTHROW;
|
||||
int acc_async_test_all (void) __GOACC_NOTHROW;
|
||||
void acc_wait (int) __GOACC_NOTHROW;
|
||||
void acc_async_wait (int) __GOACC_NOTHROW;
|
||||
void acc_wait_async (int, int) __GOACC_NOTHROW;
|
||||
void acc_wait_all (void) __GOACC_NOTHROW;
|
||||
void acc_async_wait_all (void) __GOACC_NOTHROW;
|
||||
void acc_wait_all_async (int) __GOACC_NOTHROW;
|
||||
void acc_init (acc_device_t) __GOACC_NOTHROW;
|
||||
void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
|
||||
#ifdef __cplusplus
|
||||
int acc_on_device (int __arg) __GOACC_NOTHROW;
|
||||
#else
|
||||
int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
|
||||
#endif
|
||||
void *acc_malloc (size_t) __GOACC_NOTHROW;
|
||||
void acc_free (void *) __GOACC_NOTHROW;
|
||||
void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
|
||||
void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
|
||||
void *acc_pcopyin (void *, size_t) __GOACC_NOTHROW;
|
||||
void *acc_create (void *, size_t) __GOACC_NOTHROW;
|
||||
void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
|
||||
void *acc_pcreate (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_copyout (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_delete (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_update_device (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_update_self (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_unmap_data (void *) __GOACC_NOTHROW;
|
||||
void *acc_deviceptr (void *) __GOACC_NOTHROW;
|
||||
void *acc_hostptr (void *) __GOACC_NOTHROW;
|
||||
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_attach (void **) __GOACC_NOTHROW;
|
||||
void acc_attach_async (void **, int) __GOACC_NOTHROW;
|
||||
void acc_detach (void **) __GOACC_NOTHROW;
|
||||
void acc_detach_async (void **, int) __GOACC_NOTHROW;
|
||||
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
|
||||
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_detach_finalize (void **) __GOACC_NOTHROW;
|
||||
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
|
||||
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
|
||||
void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
|
||||
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
|
||||
void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
|
||||
void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
|
||||
int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#pragma acc routine seq
|
||||
inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
|
||||
{
|
||||
return acc_on_device ((int) __arg);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
72
third_party/aarch64/upgrade.sh
vendored
Executable file
72
third_party/aarch64/upgrade.sh
vendored
Executable file
|
@ -0,0 +1,72 @@
|
|||
#!/bin/sh
|
||||
# /opt/aarch64o2/lib/gcc/aarch64-linux-musl/9.2.0/include
|
||||
# /opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
|
||||
|
||||
# IMPORTANT NOTES
|
||||
#
|
||||
# 1. You also need:
|
||||
# #pragma GCC diagnostic ignored "-Wmissing-braces"
|
||||
# In third_party/aarch64/arm_neon.internal.h
|
||||
#
|
||||
# 2. You have to rewrite arm_fp16 to use `__funline`.
|
||||
#
|
||||
# 3. You should fix up the `#pragma GCC aarch64` things.
|
||||
#
|
||||
|
||||
s=/opt/cross11portcosmo/lib/gcc/aarch64-linux-musl/11.2.0/include
|
||||
d=third_party/aarch64
|
||||
|
||||
FILES='
|
||||
arm_acle
|
||||
arm_fp16
|
||||
arm_neon
|
||||
acc_prof
|
||||
arm_bf16
|
||||
arm_sve
|
||||
acc_prof
|
||||
openacc
|
||||
'
|
||||
|
||||
strip_c_comments() {
|
||||
# https://stackoverflow.com/a/13062682/1653720
|
||||
[ $# -eq 2 ] && arg="$1" || arg=""
|
||||
eval file="\$$#"
|
||||
sed 's/a/aA/g; s/__/aB/g; s/#/aC/g' "$file" |
|
||||
gcc -P -E $arg - |
|
||||
sed 's/aC/#/g; s/aB/__/g; s/aA/a/g'
|
||||
}
|
||||
|
||||
rm -f third_party/aarch64/*.h
|
||||
|
||||
for f in $FILES; do
|
||||
echo cp $s/$f.h $d/$f.internal.h
|
||||
cp $s/$f.h $d/$f.internal.h || exit
|
||||
done
|
||||
|
||||
sed -i \
|
||||
-e 's/# *include/#include/' \
|
||||
-e '/#include .std/d' \
|
||||
-e 's!#include [<"]!#include "third_party/aarch64/!' \
|
||||
-e 's!\.h[>"]$!.internal.h"!' \
|
||||
third_party/aarch64/*.h
|
||||
|
||||
# solve the pedantic gcc linter warning `'vmulxh_f16' is static but used
|
||||
# in inline function 'vmulxh_laneq_f16' which is not static [-Werror]`
|
||||
sed -i \
|
||||
-e 's/static/extern/g' \
|
||||
third_party/aarch64/arm_fp16.internal.h
|
||||
|
||||
for f in third_party/aarch64/*.h; do
|
||||
strip_c_comments $f >$f.tmp || exit
|
||||
mv $f.tmp $f
|
||||
done
|
||||
|
||||
for f in third_party/aarch64/*.h; do
|
||||
(
|
||||
printf %s\\n '/* clang-format off */'
|
||||
printf %s\\n '#if defined(__aarch64__) && !(__ASSEMBLER__ + __LINKER__ + 0)'
|
||||
cat $f
|
||||
printf %s\\n '#endif'
|
||||
) >$f.tmp
|
||||
mv $f.tmp $f
|
||||
done
|
BIN
third_party/gcc/aarch64-linux-musl/bin/ld.bfd.gz
vendored
BIN
third_party/gcc/aarch64-linux-musl/bin/ld.bfd.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-addr2line.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-addr2line.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-ar.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-as.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-as.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-c++filt.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-c++filt.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-cpp.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-cpp.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-elfedit.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-elfedit.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-g++.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-g++.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-ar.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-nm.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-ranlib.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcc.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov-dump.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov-dump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov-tool.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov-tool.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gcov.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-gprof.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-gprof.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-nm.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-objcopy.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-objcopy.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-objdump.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-objdump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-ranlib.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-readelf.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-readelf.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-size.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-size.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-strings.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-strings.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/aarch64-linux-musl-strip.gz
vendored
BIN
third_party/gcc/bin/aarch64-linux-musl-strip.gz
vendored
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/cc1.gz
vendored
Executable file
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/cc1.gz
vendored
Executable file
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/cc1plus.gz
vendored
Executable file
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/cc1plus.gz
vendored
Executable file
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/collect2.gz
vendored
Executable file
BIN
third_party/gcc/libexec/gcc/aarch64-linux-musl/11.2.0/collect2.gz
vendored
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
2
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
|
@ -1,6 +1,6 @@
|
|||
#!/bin/sh
|
||||
|
||||
ARCH=${1:-x86_64}
|
||||
ARCH=${1:-aarch64}
|
||||
IMPORT=${2:-/opt/cross11portcosmo}
|
||||
PREFIX=third_party/gcc/
|
||||
OLDVERSION=9.2.0
|
||||
|
|
2
third_party/ggml/ggjt.v1.q4_0.c
vendored
2
third_party/ggml/ggjt.v1.q4_0.c
vendored
|
@ -29,7 +29,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q4_1.c
vendored
2
third_party/ggml/ggjt.v1.q4_1.c
vendored
|
@ -29,7 +29,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q4_1.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_1.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q4_2.c
vendored
2
third_party/ggml/ggjt.v1.q4_2.c
vendored
|
@ -29,7 +29,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/fp16.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q5_0.c
vendored
2
third_party/ggml/ggjt.v1.q5_0.c
vendored
|
@ -28,7 +28,7 @@
|
|||
#include "third_party/ggml/ggjt.v1.q5_0.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/fp16.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q5_1.c
vendored
2
third_party/ggml/ggjt.v1.q5_1.c
vendored
|
@ -20,7 +20,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/math.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/fp16.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_1.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q8_0.c
vendored
2
third_party/ggml/ggjt.v1.q8_0.c
vendored
|
@ -19,7 +19,7 @@
|
|||
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.internal.h"
|
||||
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
|
|
2
third_party/ggml/ggjt.v1.q8_1.c
vendored
2
third_party/ggml/ggjt.v1.q8_1.c
vendored
|
@ -19,7 +19,7 @@
|
|||
#include "third_party/ggml/ggjt.v1.q8_1.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/ggml/ggml.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
#include "third_party/libcxx/math.h"
|
||||
|
|
2
third_party/ggml/ggjt.v2.internal.h
vendored
2
third_party/ggml/ggjt.v2.internal.h
vendored
|
@ -1,7 +1,7 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_GGML_GGJT_V2_INTERNAL_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_GGML_GGJT_V2_INTERNAL_H_
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
|
2
third_party/ggml/ggml.c
vendored
2
third_party/ggml/ggml.c
vendored
|
@ -39,7 +39,7 @@
|
|||
#include "libc/sysv/consts/clock.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/time/time.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/tinymath/magicu.h"
|
||||
|
|
2
third_party/ggml/ggml.mk
vendored
2
third_party/ggml/ggml.mk
vendored
|
@ -192,6 +192,8 @@ o/$(MODE)/third_party/ggml/companionai.txt.zip.o: private \
|
|||
ZIPOBJ_FLAGS += \
|
||||
-B
|
||||
|
||||
o/$(MODE)/third_party/ggml/ggml.o: private QUOTA = -C64
|
||||
|
||||
################################################################################
|
||||
|
||||
THIRD_PARTY_GGML_COMS = \
|
||||
|
|
2
third_party/stb/stb_image.c
vendored
2
third_party/stb/stb_image.c
vendored
|
@ -32,7 +32,7 @@
|
|||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/x/x.h"
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/ammintrin.internal.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
|
|
2
third_party/zlib/adler32_simd.c
vendored
2
third_party/zlib/adler32_simd.c
vendored
|
@ -203,7 +203,7 @@ uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */
|
|||
|
||||
#elif defined(ADLER32_SIMD_NEON)
|
||||
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
|
||||
uint32_t ZLIB_INTERNAL adler32_simd_( /* NEON */
|
||||
uint32_t adler,
|
||||
|
|
2
third_party/zlib/chunkcopy.inc
vendored
2
third_party/zlib/chunkcopy.inc
vendored
|
@ -28,7 +28,7 @@
|
|||
#endif
|
||||
|
||||
#if defined(INFLATE_CHUNK_SIMD_NEON)
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
typedef uint8x16_t z_vec128i_t;
|
||||
#elif defined(INFLATE_CHUNK_SIMD_SSE2)
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
|
9
third_party/zlib/crc32_simd.c
vendored
9
third_party/zlib/crc32_simd.c
vendored
|
@ -10,6 +10,7 @@ Chromium (BSD-3 License)\\n\
|
|||
Copyright 2017 The Chromium Authors\"");
|
||||
// clang-format off
|
||||
|
||||
#include "third_party/intel/x86gprintrin.internal.h"
|
||||
#include "third_party/zlib/crc32_simd.internal.h"
|
||||
#if defined(CRC32_SIMD_AVX512_PCLMUL)
|
||||
|
||||
|
@ -358,8 +359,8 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
|
|||
/* We need some extra types for using PMULL.
|
||||
*/
|
||||
#if defined(__aarch64__)
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_acle.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/aarch64/arm_acle.internal.h"
|
||||
#endif
|
||||
|
||||
/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an
|
||||
|
@ -400,8 +401,8 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
|
|||
/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not
|
||||
* allowed. We can just include arm_acle.h.
|
||||
*/
|
||||
#include "third_party/aarch64/arm_neon.h"
|
||||
#include "third_party/aarch64/arm_acle.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/aarch64/arm_acle.internal.h"
|
||||
#define TARGET_ARMV8_WITH_CRC
|
||||
#else // !defined(__GNUC__) && !defined(_aarch64__)
|
||||
#error ARM CRC32 SIMD extensions only supported for Clang and GCC
|
||||
|
|
2
third_party/zlib/slide_hash_simd.inc
vendored
2
third_party/zlib/slide_hash_simd.inc
vendored
|
@ -41,7 +41,7 @@ typedef __m128i z_vec128i_u16x8_t;
|
|||
|
||||
#elif defined(DEFLATE_SLIDE_HASH_NEON)
|
||||
|
||||
#include "third_party/aarch64/arm_neon.h" /* NEON */
|
||||
#include "third_party/aarch64/arm_neon.internal.h" /* NEON */
|
||||
|
||||
#define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize))
|
||||
|
||||
|
|
Loading…
Reference in a new issue