mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make the intrinsics more readable
This commit is contained in:
parent
210187cf77
commit
80db9de173
75 changed files with 12444 additions and 21493 deletions
|
@ -854,5 +854,9 @@ typedef struct {
|
|||
asm(".weak\t" #alias "\n\t" \
|
||||
".equ\t" #alias ", " #sym)
|
||||
|
||||
#define __funline \
|
||||
extern __inline \
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
|
||||
#define MACHINE_CODE_ANALYSIS_BEGIN_
|
||||
#define MACHINE_CODE_ANALYSIS_END_
|
||||
|
|
182
third_party/aarch64/arm_fp16.h
vendored
182
third_party/aarch64/arm_fp16.h
vendored
|
@ -8,365 +8,361 @@
|
|||
#pragma GCC push_options
|
||||
#pragma GCC target("arch=armv8.2-a+fp16")
|
||||
|
||||
#define FUNC \
|
||||
__extension__ extern __inline \
|
||||
__attribute__((__always_inline__, __gnu_inline__, __artificial__))
|
||||
|
||||
typedef __fp16 float16_t;
|
||||
|
||||
FUNC float16_t vabsh_f16(float16_t __a) {
|
||||
__funline float16_t vabsh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_abshf(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vceqzh_f16(float16_t __a) {
|
||||
__funline uint16_t vceqzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmeqhf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcgezh_f16(float16_t __a) {
|
||||
__funline uint16_t vcgezh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmgehf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcgtzh_f16(float16_t __a) {
|
||||
__funline uint16_t vcgtzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmgthf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
FUNC uint16_t vclezh_f16(float16_t __a) {
|
||||
__funline uint16_t vclezh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmlehf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcltzh_f16(float16_t __a) {
|
||||
__funline uint16_t vcltzh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_cmlthf_uss(__a, 0.0f);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_s16(int16_t __a) {
|
||||
__funline float16_t vcvth_f16_s16(int16_t __a) {
|
||||
return __builtin_aarch64_floathihf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_s32(int32_t __a) {
|
||||
__funline float16_t vcvth_f16_s32(int32_t __a) {
|
||||
return __builtin_aarch64_floatsihf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_s64(int64_t __a) {
|
||||
__funline float16_t vcvth_f16_s64(int64_t __a) {
|
||||
return __builtin_aarch64_floatdihf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_u16(uint16_t __a) {
|
||||
__funline float16_t vcvth_f16_u16(uint16_t __a) {
|
||||
return __builtin_aarch64_floatunshihf_us(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_u32(uint32_t __a) {
|
||||
__funline float16_t vcvth_f16_u32(uint32_t __a) {
|
||||
return __builtin_aarch64_floatunssihf_us(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_f16_u64(uint64_t __a) {
|
||||
__funline float16_t vcvth_f16_u64(uint64_t __a) {
|
||||
return __builtin_aarch64_floatunsdihf_us(__a);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvth_s16_f16(float16_t __a) {
|
||||
__funline int16_t vcvth_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfhi(__a);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvth_s32_f16(float16_t __a) {
|
||||
__funline int32_t vcvth_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfsi(__a);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvth_s64_f16(float16_t __a) {
|
||||
__funline int64_t vcvth_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fix_trunchfdi(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvth_u16_f16(float16_t __a) {
|
||||
__funline uint16_t vcvth_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfhi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvth_u32_f16(float16_t __a) {
|
||||
__funline uint32_t vcvth_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfsi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvth_u64_f16(float16_t __a) {
|
||||
__funline uint64_t vcvth_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_fixuns_trunchfdi_us(__a);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvtah_s16_f16(float16_t __a) {
|
||||
__funline int16_t vcvtah_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfhi(__a);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvtah_s32_f16(float16_t __a) {
|
||||
__funline int32_t vcvtah_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfsi(__a);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvtah_s64_f16(float16_t __a) {
|
||||
__funline int64_t vcvtah_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lroundhfdi(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvtah_u16_f16(float16_t __a) {
|
||||
__funline uint16_t vcvtah_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfhi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvtah_u32_f16(float16_t __a) {
|
||||
__funline uint32_t vcvtah_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfsi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvtah_u64_f16(float16_t __a) {
|
||||
__funline uint64_t vcvtah_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lrounduhfdi_us(__a);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvtmh_s16_f16(float16_t __a) {
|
||||
__funline int16_t vcvtmh_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfhi(__a);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvtmh_s32_f16(float16_t __a) {
|
||||
__funline int32_t vcvtmh_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfsi(__a);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvtmh_s64_f16(float16_t __a) {
|
||||
__funline int64_t vcvtmh_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfloorhfdi(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvtmh_u16_f16(float16_t __a) {
|
||||
__funline uint16_t vcvtmh_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfhi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvtmh_u32_f16(float16_t __a) {
|
||||
__funline uint32_t vcvtmh_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfsi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvtmh_u64_f16(float16_t __a) {
|
||||
__funline uint64_t vcvtmh_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lflooruhfdi_us(__a);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvtnh_s16_f16(float16_t __a) {
|
||||
__funline int16_t vcvtnh_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfhi(__a);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvtnh_s32_f16(float16_t __a) {
|
||||
__funline int32_t vcvtnh_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfsi(__a);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvtnh_s64_f16(float16_t __a) {
|
||||
__funline int64_t vcvtnh_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnhfdi(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvtnh_u16_f16(float16_t __a) {
|
||||
__funline uint16_t vcvtnh_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfhi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvtnh_u32_f16(float16_t __a) {
|
||||
__funline uint32_t vcvtnh_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfsi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvtnh_u64_f16(float16_t __a) {
|
||||
__funline uint64_t vcvtnh_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lfrintnuhfdi_us(__a);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvtph_s16_f16(float16_t __a) {
|
||||
__funline int16_t vcvtph_s16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfhi(__a);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvtph_s32_f16(float16_t __a) {
|
||||
__funline int32_t vcvtph_s32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfsi(__a);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvtph_s64_f16(float16_t __a) {
|
||||
__funline int64_t vcvtph_s64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceilhfdi(__a);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvtph_u16_f16(float16_t __a) {
|
||||
__funline uint16_t vcvtph_u16_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfhi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvtph_u32_f16(float16_t __a) {
|
||||
__funline uint32_t vcvtph_u32_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfsi_us(__a);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvtph_u64_f16(float16_t __a) {
|
||||
__funline uint64_t vcvtph_u64_f16(float16_t __a) {
|
||||
return __builtin_aarch64_lceiluhfdi_us(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vnegh_f16(float16_t __a) {
|
||||
__funline float16_t vnegh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_neghf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrecpeh_f16(float16_t __a) {
|
||||
__funline float16_t vrecpeh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frecpehf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrecpxh_f16(float16_t __a) {
|
||||
__funline float16_t vrecpxh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frecpxhf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndh_f16(float16_t __a) {
|
||||
__funline float16_t vrndh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_btrunchf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndah_f16(float16_t __a) {
|
||||
__funline float16_t vrndah_f16(float16_t __a) {
|
||||
return __builtin_aarch64_roundhf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndih_f16(float16_t __a) {
|
||||
__funline float16_t vrndih_f16(float16_t __a) {
|
||||
return __builtin_aarch64_nearbyinthf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndmh_f16(float16_t __a) {
|
||||
__funline float16_t vrndmh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_floorhf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndnh_f16(float16_t __a) {
|
||||
__funline float16_t vrndnh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_frintnhf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndph_f16(float16_t __a) {
|
||||
__funline float16_t vrndph_f16(float16_t __a) {
|
||||
return __builtin_aarch64_ceilhf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrndxh_f16(float16_t __a) {
|
||||
__funline float16_t vrndxh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_rinthf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vrsqrteh_f16(float16_t __a) {
|
||||
__funline float16_t vrsqrteh_f16(float16_t __a) {
|
||||
return __builtin_aarch64_rsqrtehf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vsqrth_f16(float16_t __a) {
|
||||
__funline float16_t vsqrth_f16(float16_t __a) {
|
||||
return __builtin_aarch64_sqrthf(__a);
|
||||
}
|
||||
|
||||
FUNC float16_t vaddh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vaddh_f16(float16_t __a, float16_t __b) {
|
||||
return __a + __b;
|
||||
}
|
||||
|
||||
FUNC float16_t vabdh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vabdh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fabdhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcageh_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcageh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_facgehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcagth_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcagth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_facgthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_faclehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcalth_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcalth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_faclthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vceqh_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vceqh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmeqhf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmgehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcgth_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcgth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmgthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcleh_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vcleh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmlehf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vclth_f16(float16_t __a, float16_t __b) {
|
||||
__funline uint16_t vclth_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_cmlthf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfhi(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfsihf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
|
||||
return __builtin_aarch64_scvtfdihf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfhi_sus(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfsihf_sus(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
|
||||
__funline float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
|
||||
return __builtin_aarch64_ucvtfdihf_sus(__a, __b);
|
||||
}
|
||||
|
||||
FUNC int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
|
||||
__funline int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
|
||||
__funline int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshfsi(__a, __b);
|
||||
}
|
||||
|
||||
FUNC int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
|
||||
__funline int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzshfdi(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
|
||||
__funline uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhf_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
|
||||
__funline uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhfsi_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
|
||||
__funline uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
|
||||
return __builtin_aarch64_fcvtzuhfdi_uss(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vdivh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vdivh_f16(float16_t __a, float16_t __b) {
|
||||
return __a / __b;
|
||||
}
|
||||
|
||||
FUNC float16_t vmaxh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vmaxh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmaxhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmaxhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vminh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vminh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fminhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vminnmh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vminnmh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fminhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vmulh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vmulh_f16(float16_t __a, float16_t __b) {
|
||||
return __a * __b;
|
||||
}
|
||||
|
||||
FUNC float16_t vmulxh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vmulxh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_fmulxhf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_frecpshf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
|
||||
return __builtin_aarch64_rsqrtshf(__a, __b);
|
||||
}
|
||||
|
||||
FUNC float16_t vsubh_f16(float16_t __a, float16_t __b) {
|
||||
__funline float16_t vsubh_f16(float16_t __a, float16_t __b) {
|
||||
return __a - __b;
|
||||
}
|
||||
|
||||
FUNC float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
__funline float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
return __builtin_aarch64_fmahf(__b, __c, __a);
|
||||
}
|
||||
|
||||
FUNC float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
__funline float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
|
||||
return __builtin_aarch64_fnmahf(__b, __c, __a);
|
||||
}
|
||||
|
||||
|
|
8052
third_party/aarch64/arm_neon.h
vendored
8052
third_party/aarch64/arm_neon.h
vendored
File diff suppressed because it is too large
Load diff
48
third_party/ggml/ggml.c
vendored
48
third_party/ggml/ggml.c
vendored
|
@ -1784,24 +1784,40 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|||
// Initialize accumulator with zeros
|
||||
__m256 acc = _mm256_setzero_ps();
|
||||
|
||||
//
|
||||
// Main loop
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
/* Compute combined scale for the block */
|
||||
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
|
||||
|
||||
__m256i bx = bytes_from_nibbles_32(x[i].qs);
|
||||
|
||||
// Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
|
||||
const __m256i off = _mm256_set1_epi8( 8 );
|
||||
bx = _mm256_sub_epi8( bx, off );
|
||||
|
||||
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
||||
|
||||
const __m256 q = mul_sum_i8_pairs_float(bx, by);
|
||||
|
||||
/* Multiply q with scale and accumulate */
|
||||
acc = _mm256_fmadd_ps( d, q, acc );
|
||||
//
|
||||
#define WORK(I) \
|
||||
/* Compute combined scale for the block */ \
|
||||
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[I].d ), _mm256_broadcast_ss( &y[I].d ) ); \
|
||||
__m256i bx = bytes_from_nibbles_32(x[I].qs); \
|
||||
/* Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. */ \
|
||||
const __m256i off = _mm256_set1_epi8( 8 ); \
|
||||
bx = _mm256_sub_epi8( bx, off ); \
|
||||
__m256i by = _mm256_loadu_si256((const __m256i *)y[I].qs); \
|
||||
const __m256 q = mul_sum_i8_pairs_float(bx, by); \
|
||||
/* Multiply q with scale and accumulate */ \
|
||||
acc = _mm256_fmadd_ps( d, q, acc )
|
||||
int i = 0;
|
||||
for (; i + 12 < nb; i += 12) {
|
||||
_mm_prefetch(x+i+12, 3);
|
||||
_mm_prefetch(x+i+15, 3);
|
||||
_mm_prefetch(x+i+18, 3);
|
||||
_mm_prefetch(x+i+21, 3);
|
||||
_mm_prefetch(y+i+12, 3);
|
||||
_mm_prefetch(y+i+14, 3);
|
||||
_mm_prefetch(y+i+16, 3);
|
||||
_mm_prefetch(y+i+18, 3);
|
||||
_mm_prefetch(y+i+20, 3);
|
||||
_mm_prefetch(y+i+22, 3);
|
||||
for (int j = 0; j < 12; ++j) {
|
||||
WORK(i+j);
|
||||
}
|
||||
}
|
||||
for (; i < nb; ++i) {
|
||||
WORK(i);
|
||||
}
|
||||
#undef WORK
|
||||
|
||||
*s = hsum_float_8(acc);
|
||||
#elif defined(__AVX__)
|
||||
|
|
39
third_party/intel/adxintrin.internal.h
vendored
39
third_party/intel/adxintrin.internal.h
vendored
|
@ -5,46 +5,37 @@
|
|||
#ifndef _ADXINTRIN_H_INCLUDED
|
||||
#define _ADXINTRIN_H_INCLUDED
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_sbb_u32(__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P) {
|
||||
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_sbb_u64(__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P) {
|
||||
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P) {
|
||||
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
}
|
||||
#endif
|
||||
|
|
26
third_party/intel/ammintrin.internal.h
vendored
26
third_party/intel/ammintrin.internal.h
vendored
|
@ -9,28 +9,21 @@
|
|||
#define __DISABLE_SSE4A__
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_sd(double* __P, __m128d __Y) {
|
||||
__funline void _mm_stream_sd(double* __P, __m128d __Y) {
|
||||
__builtin_ia32_movntsd(__P, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_ss(float* __P, __m128 __Y) {
|
||||
__funline void _mm_stream_ss(float* __P, __m128 __Y) {
|
||||
__builtin_ia32_movntss(__P, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_si64(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_extrq((__v2di)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_extracti_si64(__m128i __X, unsigned const int __I, unsigned const int __L) {
|
||||
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_extrqi((__v2di)__X, __I, __L);
|
||||
}
|
||||
#else
|
||||
|
@ -39,16 +32,13 @@ _mm_extracti_si64(__m128i __X, unsigned const int __I, unsigned const int __L) {
|
|||
(unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_si64(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_insertq((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I,
|
||||
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y,
|
||||
unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_insertqi((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
}
|
||||
|
|
806
third_party/intel/avx2intrin.internal.h
vendored
806
third_party/intel/avx2intrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
78
third_party/intel/avx5124fmapsintrin.internal.h
vendored
78
third_party/intel/avx5124fmapsintrin.internal.h
vendored
|
@ -12,109 +12,93 @@
|
|||
#define __DISABLE_AVX5124FMAPS__
|
||||
#endif /* __AVX5124FMAPS__ */
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F) {
|
||||
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F) {
|
||||
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F) {
|
||||
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F) {
|
||||
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F) {
|
||||
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F) {
|
||||
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F) {
|
||||
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F) {
|
||||
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
|
|
41
third_party/intel/avx5124vnniwintrin.internal.h
vendored
41
third_party/intel/avx5124vnniwintrin.internal.h
vendored
|
@ -12,58 +12,49 @@
|
|||
#define __DISABLE_AVX5124VNNIW__
|
||||
#endif /* __AVX5124VNNIW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
|
||||
__m512i __E, __m128i *__F) {
|
||||
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C, __m512i __D,
|
||||
__m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
|
||||
__m512i __E, __m128i *__F) {
|
||||
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
|
|
103
third_party/intel/avx512bitalgintrin.internal.h
vendored
103
third_party/intel/avx512bitalgintrin.internal.h
vendored
|
@ -12,15 +12,11 @@
|
|||
#define __DISABLE_AVX512BITALG__
|
||||
#endif /* __AVX512BITALG__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi8(__m512i __A) {
|
||||
__funline __m512i _mm512_popcnt_epi8(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi((__v64qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi16(__m512i __A) {
|
||||
__funline __m512i _mm512_popcnt_epi16(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi((__v32hi)__A);
|
||||
}
|
||||
|
||||
|
@ -35,43 +31,34 @@ extern __inline __m512i
|
|||
#define __DISABLE_AVX512BITALGBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
|
||||
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
|
||||
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
||||
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
}
|
||||
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A, __m512i __B) {
|
||||
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M);
|
||||
}
|
||||
|
@ -88,30 +75,24 @@ extern __inline __mmask64
|
|||
#define __DISABLE_AVX512BITALGVLBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
|
||||
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
|
||||
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
|
||||
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
}
|
||||
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A, __m256i __B) {
|
||||
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M);
|
||||
}
|
||||
|
@ -127,81 +108,59 @@ extern __inline __mmask32
|
|||
#define __DISABLE_AVX512BITALGVL__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
|
||||
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A, __m128i __B) {
|
||||
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi8(__m256i __A) {
|
||||
__funline __m256i _mm256_popcnt_epi8(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi((__v32qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi16(__m256i __A) {
|
||||
__funline __m256i _mm256_popcnt_epi16(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi((__v16hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi8(__m128i __A) {
|
||||
__funline __m128i _mm_popcnt_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi16(__m128i __A) {
|
||||
__funline __m128i _mm_popcnt_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
|
||||
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
|
||||
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
|
||||
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
|
||||
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask(
|
||||
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
|
1430
third_party/intel/avx512bwintrin.internal.h
vendored
1430
third_party/intel/avx512bwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
60
third_party/intel/avx512cdintrin.internal.h
vendored
60
third_party/intel/avx512cdintrin.internal.h
vendored
|
@ -20,99 +20,75 @@ typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
|
|||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi32(__m512i __A) {
|
||||
__funline __m512i _mm512_conflict_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)__W, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi64(__m512i __A) {
|
||||
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi64(__m512i __A) {
|
||||
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi32(__m512i __A) {
|
||||
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmb_epi64(__mmask8 __A) {
|
||||
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmb512(__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmw_epi32(__mmask16 __A) {
|
||||
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmw512(__A);
|
||||
}
|
||||
|
||||
|
|
959
third_party/intel/avx512dqintrin.internal.h
vendored
959
third_party/intel/avx512dqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
99
third_party/intel/avx512erintrin.internal.h
vendored
99
third_party/intel/avx512erintrin.internal.h
vendored
|
@ -21,159 +21,126 @@ typedef unsigned char __mmask8;
|
|||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_pd(__m512d __A, int __R) {
|
||||
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) {
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_ps(__m512 __A, int __R) {
|
||||
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) {
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512 __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_pd(__m512d __A, int __R) {
|
||||
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) {
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_ps(__m512 __A, int __R) {
|
||||
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) {
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_pd(__m512d __A, int __R) {
|
||||
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) {
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A, int __R) {
|
||||
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_ps(__m512 __A, int __R) {
|
||||
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) {
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512 __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
}
|
||||
|
||||
|
|
6393
third_party/intel/avx512fintrin.internal.h
vendored
6393
third_party/intel/avx512fintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
32
third_party/intel/avx512ifmaintrin.internal.h
vendored
32
third_party/intel/avx512ifmaintrin.internal.h
vendored
|
@ -11,48 +11,36 @@
|
|||
#define __DISABLE_AVX512IFMA__
|
||||
#endif /* __AVX512IFMA__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
|
||||
__m512i __Z) {
|
||||
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
|
||||
__m512i __Z) {
|
||||
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
|
60
third_party/intel/avx512ifmavlintrin.internal.h
vendored
60
third_party/intel/avx512ifmavlintrin.internal.h
vendored
|
@ -12,90 +12,70 @@
|
|||
#define __DISABLE_AVX512IFMAVL__
|
||||
#endif /* __AVX512IFMAVL__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
|
||||
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
|
||||
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y,
|
||||
__m256i __Z) {
|
||||
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y,
|
||||
__m256i __Z) {
|
||||
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
}
|
||||
|
|
73
third_party/intel/avx512pfintrin.internal.h
vendored
73
third_party/intel/avx512pfintrin.internal.h
vendored
|
@ -18,130 +18,99 @@ typedef unsigned char __mmask8;
|
|||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
|
||||
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
|
||||
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_ps(__m512i __index, __mmask16 __mask,
|
||||
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
|
||||
__mmask16 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
|
||||
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
|
||||
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index, int __scale,
|
||||
int __hint) {
|
||||
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
|
||||
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdps(__mask, (__v16si)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
|
||||
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
|
|
230
third_party/intel/avx512vbmi2intrin.internal.h
vendored
230
third_party/intel/avx512vbmi2intrin.internal.h
vendored
|
@ -13,101 +13,77 @@
|
|||
#endif /* __AVX512VBMI2__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E) {
|
||||
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E) {
|
||||
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
|
@ -161,99 +137,79 @@ extern __inline __m512i
|
|||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
||||
#endif
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
}
|
||||
|
@ -270,132 +226,106 @@ _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
|
|||
#define __DISABLE_AVX512VBMI2BW__
|
||||
#endif /* __AVX512VBMI2BW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi8(__m512i __A, __mmask64 __B, __m512i __C) {
|
||||
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
|
||||
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B, __m512i __C) {
|
||||
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuqi512_mask((__v64qi *)__A, (__v64qi)__C,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi16(__m512i __A, __mmask32 __B, __m512i __C) {
|
||||
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
|
||||
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B, __m512i __C) {
|
||||
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuhi512_mask((__v32hi *)__A, (__v32hi)__C,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi8(__m512i __A, __mmask64 __B, __m512i __C) {
|
||||
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
|
||||
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_maskz(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B, const void *__C) {
|
||||
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_mask(
|
||||
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_maskz(
|
||||
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi16(__m512i __A, __mmask32 __B, __m512i __C) {
|
||||
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
|
||||
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_maskz(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B, const void *__C) {
|
||||
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_mask(
|
||||
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_maskz(
|
||||
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B, __m512i __C, int __D) {
|
||||
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
|
@ -418,30 +348,26 @@ extern __inline __m512i
|
|||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
||||
#endif
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
}
|
||||
|
|
470
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
470
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
|
@ -12,414 +12,322 @@
|
|||
#define __DISABLE_AVX512VBMI2VL__
|
||||
#endif /* __AVX512VBMIVL__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_compress_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
|
||||
__funline __m128i _mm_mask_compress_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C) {
|
||||
return (__m128i)__builtin_ia32_compressqi128_mask((__v16qi)__C, (__v16qi)__A,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_compress_epi8(__mmask16 __A, __m128i __B) {
|
||||
__funline __m128i _mm_maskz_compress_epi8(__mmask16 __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_compressqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_compressstoreu_epi16(void *__A, __mmask16 __B, __m256i __C) {
|
||||
__funline void _mm256_mask_compressstoreu_epi16(void *__A, __mmask16 __B,
|
||||
__m256i __C) {
|
||||
__builtin_ia32_compressstoreuhi256_mask((__v16hi *)__A, (__v16hi)__C,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_compress_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
|
||||
__funline __m128i _mm_mask_compress_epi16(__m128i __A, __mmask8 __B,
|
||||
__m128i __C) {
|
||||
return (__m128i)__builtin_ia32_compresshi128_mask((__v8hi)__C, (__v8hi)__A,
|
||||
(__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_compress_epi16(__mmask8 __A, __m128i __B) {
|
||||
__funline __m128i _mm_maskz_compress_epi16(__mmask8 __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_compresshi128_mask(
|
||||
(__v8hi)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_compress_epi16(__m256i __A, __mmask16 __B, __m256i __C) {
|
||||
__funline __m256i _mm256_mask_compress_epi16(__m256i __A, __mmask16 __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_compresshi256_mask((__v16hi)__C, (__v16hi)__A,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_compress_epi16(__mmask16 __A, __m256i __B) {
|
||||
__funline __m256i _mm256_maskz_compress_epi16(__mmask16 __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_compresshi256_mask(
|
||||
(__v16hi)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_compressstoreu_epi8(void *__A, __mmask16 __B, __m128i __C) {
|
||||
__funline void _mm_mask_compressstoreu_epi8(void *__A, __mmask16 __B,
|
||||
__m128i __C) {
|
||||
__builtin_ia32_compressstoreuqi128_mask((__v16qi *)__A, (__v16qi)__C,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_compressstoreu_epi16(void *__A, __mmask8 __B, __m128i __C) {
|
||||
__funline void _mm_mask_compressstoreu_epi16(void *__A, __mmask8 __B,
|
||||
__m128i __C) {
|
||||
__builtin_ia32_compressstoreuhi128_mask((__v8hi *)__A, (__v8hi)__C,
|
||||
(__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_expand_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
|
||||
__funline __m128i _mm_mask_expand_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_expandqi128_mask((__v16qi)__C, (__v16qi)__A,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_expand_epi8(__mmask16 __A, __m128i __B) {
|
||||
__funline __m128i _mm_maskz_expand_epi8(__mmask16 __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_expandqi128_maskz(
|
||||
(__v16qi)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_expandloadu_epi8(__m128i __A, __mmask16 __B, const void *__C) {
|
||||
__funline __m128i _mm_mask_expandloadu_epi8(__m128i __A, __mmask16 __B,
|
||||
const void *__C) {
|
||||
return (__m128i)__builtin_ia32_expandloadqi128_mask(
|
||||
(const __v16qi *)__C, (__v16qi)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_expandloadu_epi8(__mmask16 __A, const void *__B) {
|
||||
__funline __m128i _mm_maskz_expandloadu_epi8(__mmask16 __A, const void *__B) {
|
||||
return (__m128i)__builtin_ia32_expandloadqi128_maskz(
|
||||
(const __v16qi *)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_expand_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
|
||||
__funline __m128i _mm_mask_expand_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_expandhi128_mask((__v8hi)__C, (__v8hi)__A,
|
||||
(__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_expand_epi16(__mmask8 __A, __m128i __B) {
|
||||
__funline __m128i _mm_maskz_expand_epi16(__mmask8 __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_expandhi128_maskz(
|
||||
(__v8hi)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_expandloadu_epi16(__m128i __A, __mmask8 __B, const void *__C) {
|
||||
__funline __m128i _mm_mask_expandloadu_epi16(__m128i __A, __mmask8 __B,
|
||||
const void *__C) {
|
||||
return (__m128i)__builtin_ia32_expandloadhi128_mask(
|
||||
(const __v8hi *)__C, (__v8hi)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_expandloadu_epi16(__mmask8 __A, const void *__B) {
|
||||
__funline __m128i _mm_maskz_expandloadu_epi16(__mmask8 __A, const void *__B) {
|
||||
return (__m128i)__builtin_ia32_expandloadhi128_maskz(
|
||||
(const __v8hi *)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_expand_epi16(__m256i __A, __mmask16 __B, __m256i __C) {
|
||||
__funline __m256i _mm256_mask_expand_epi16(__m256i __A, __mmask16 __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_expandhi256_mask((__v16hi)__C, (__v16hi)__A,
|
||||
(__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_expand_epi16(__mmask16 __A, __m256i __B) {
|
||||
__funline __m256i _mm256_maskz_expand_epi16(__mmask16 __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_expandhi256_maskz(
|
||||
(__v16hi)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_expandloadu_epi16(__m256i __A, __mmask16 __B, const void *__C) {
|
||||
__funline __m256i _mm256_mask_expandloadu_epi16(__m256i __A, __mmask16 __B,
|
||||
const void *__C) {
|
||||
return (__m256i)__builtin_ia32_expandloadhi256_mask(
|
||||
(const __v16hi *)__C, (__v16hi)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_expandloadu_epi16(__mmask16 __A, const void *__B) {
|
||||
__funline __m256i _mm256_maskz_expandloadu_epi16(__mmask16 __A, const void *__B) {
|
||||
return (__m256i)__builtin_ia32_expandloadhi256_maskz(
|
||||
(const __v16hi *)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdi_epi16(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shrdi_epi16(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v16hi((__v16hi)__A, (__v16hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shrdi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__funline __m256i _mm256_mask_shrdi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v16hi_mask(
|
||||
(__v16hi)__C, (__v16hi)__D, __E, (__v16hi)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shrdi_epi16(__mmask16 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shrdi_epi16(__mmask16 __A, __m256i __B,
|
||||
__m256i __C, int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v16hi_mask(
|
||||
(__v16hi)__B, (__v16hi)__C, __D, (__v16hi)_mm256_setzero_si256(),
|
||||
(__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shrdi_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
|
||||
int __E) {
|
||||
__funline __m256i _mm256_mask_shrdi_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v8si_mask((__v8si)__C, (__v8si)__D, __E,
|
||||
(__v8si)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shrdi_epi32(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shrdi_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v8si_mask(
|
||||
(__v8si)__B, (__v8si)__C, __D, (__v8si)_mm256_setzero_si256(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdi_epi32(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shrdi_epi32(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v8si((__v8si)__A, (__v8si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shrdi_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
|
||||
int __E) {
|
||||
__funline __m256i _mm256_mask_shrdi_epi64(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v4di_mask((__v4di)__C, (__v4di)__D, __E,
|
||||
(__v4di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shrdi_epi64(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shrdi_epi64(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v4di_mask(
|
||||
(__v4di)__B, (__v4di)__C, __D, (__v4di)_mm256_setzero_si256(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdi_epi64(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shrdi_epi64(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrd_v4di((__v4di)__A, (__v4di)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdi_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shrdi_epi16(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v8hi_mask((__v8hi)__C, (__v8hi)__D, __E,
|
||||
(__v8hi)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdi_epi16(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shrdi_epi16(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v8hi_mask((__v8hi)__B, (__v8hi)__C, __D,
|
||||
(__v8hi)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdi_epi16(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shrdi_epi16(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v8hi((__v8hi)__A, (__v8hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdi_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shrdi_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v4si_mask((__v4si)__C, (__v4si)__D, __E,
|
||||
(__v4si)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdi_epi32(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shrdi_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v4si_mask((__v4si)__B, (__v4si)__C, __D,
|
||||
(__v4si)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdi_epi32(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shrdi_epi32(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v4si((__v4si)__A, (__v4si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdi_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shrdi_epi64(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v2di_mask((__v2di)__C, (__v2di)__D, __E,
|
||||
(__v2di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdi_epi64(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shrdi_epi64(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v2di_mask((__v2di)__B, (__v2di)__C, __D,
|
||||
(__v2di)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdi_epi64(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shrdi_epi64(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrd_v2di((__v2di)__A, (__v2di)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldi_epi16(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shldi_epi16(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v16hi((__v16hi)__A, (__v16hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shldi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__funline __m256i _mm256_mask_shldi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v16hi_mask(
|
||||
(__v16hi)__C, (__v16hi)__D, __E, (__v16hi)__A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shldi_epi16(__mmask16 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shldi_epi16(__mmask16 __A, __m256i __B,
|
||||
__m256i __C, int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v16hi_mask(
|
||||
(__v16hi)__B, (__v16hi)__C, __D, (__v16hi)_mm256_setzero_si256(),
|
||||
(__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shldi_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
|
||||
int __E) {
|
||||
__funline __m256i _mm256_mask_shldi_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v8si_mask((__v8si)__C, (__v8si)__D, __E,
|
||||
(__v8si)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shldi_epi32(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shldi_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v8si_mask(
|
||||
(__v8si)__B, (__v8si)__C, __D, (__v8si)_mm256_setzero_si256(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldi_epi32(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shldi_epi32(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v8si((__v8si)__A, (__v8si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_shldi_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
|
||||
int __E) {
|
||||
__funline __m256i _mm256_mask_shldi_epi64(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D, int __E) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v4di_mask((__v4di)__C, (__v4di)__D, __E,
|
||||
(__v4di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_shldi_epi64(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
|
||||
__funline __m256i _mm256_maskz_shldi_epi64(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
int __D) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v4di_mask(
|
||||
(__v4di)__B, (__v4di)__C, __D, (__v4di)_mm256_setzero_si256(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldi_epi64(__m256i __A, __m256i __B, int __C) {
|
||||
__funline __m256i _mm256_shldi_epi64(__m256i __A, __m256i __B, int __C) {
|
||||
return (__m256i)__builtin_ia32_vpshld_v4di((__v4di)__A, (__v4di)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldi_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shldi_epi16(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v8hi_mask((__v8hi)__C, (__v8hi)__D, __E,
|
||||
(__v8hi)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldi_epi16(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shldi_epi16(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v8hi_mask((__v8hi)__B, (__v8hi)__C, __D,
|
||||
(__v8hi)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldi_epi16(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shldi_epi16(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v8hi((__v8hi)__A, (__v8hi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldi_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shldi_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v4si_mask((__v4si)__C, (__v4si)__D, __E,
|
||||
(__v4si)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldi_epi32(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shldi_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v4si_mask((__v4si)__B, (__v4si)__C, __D,
|
||||
(__v4si)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldi_epi32(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shldi_epi32(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v4si((__v4si)__A, (__v4si)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldi_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
|
||||
int __E) {
|
||||
__funline __m128i _mm_mask_shldi_epi64(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D, int __E) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v2di_mask((__v2di)__C, (__v2di)__D, __E,
|
||||
(__v2di)__A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldi_epi64(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
|
||||
__funline __m128i _mm_maskz_shldi_epi64(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
int __D) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v2di_mask((__v2di)__B, (__v2di)__C, __D,
|
||||
(__v2di)_mm_setzero_si128(),
|
||||
(__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldi_epi64(__m128i __A, __m128i __B, int __C) {
|
||||
__funline __m128i _mm_shldi_epi64(__m128i __A, __m128i __B, int __C) {
|
||||
return (__m128i)__builtin_ia32_vpshld_v2di((__v2di)__A, (__v2di)__B, __C);
|
||||
}
|
||||
#else
|
||||
|
@ -545,254 +453,206 @@ extern __inline __m128i
|
|||
(__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
|
||||
#endif
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v16hi((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)__C, (__v16hi)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shrdv_epi16(__mmask16 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz(
|
||||
(__v16hi)__B, (__v16hi)__C, (__v16hi)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shrdv_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz((__v8si)__B, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v4di((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v4di_mask((__v4di)__A, (__v4di)__C,
|
||||
(__v4di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shrdv_epi64(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz((__v4di)__B, (__v4di)__C,
|
||||
(__v4di)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v8hi((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask((__v8hi)__A, (__v8hi)__C,
|
||||
(__v8hi)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shrdv_epi16(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz((__v8hi)__B, (__v8hi)__C,
|
||||
(__v8hi)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shrdv_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz((__v4si)__B, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v2di((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v2di_mask((__v2di)__A, (__v2di)__C,
|
||||
(__v2di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shrdv_epi64(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz((__v2di)__B, (__v2di)__C,
|
||||
(__v2di)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v16hi((__v16hi)__A, (__v16hi)__B,
|
||||
(__v16hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)__C, (__v16hi)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shldv_epi16(__mmask16 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz(
|
||||
(__v16hi)__B, (__v16hi)__C, (__v16hi)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shldv_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v8si_maskz((__v8si)__B, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v4di((__v4di)__A, (__v4di)__B,
|
||||
(__v4di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v4di_mask((__v4di)__A, (__v4di)__C,
|
||||
(__v4di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_shldv_epi64(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpshldv_v4di_maskz((__v4di)__B, (__v4di)__C,
|
||||
(__v4di)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v8hi((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldv_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shldv_epi16(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v8hi_mask((__v8hi)__A, (__v8hi)__C,
|
||||
(__v8hi)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldv_epi16(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shldv_epi16(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz((__v8hi)__B, (__v8hi)__C,
|
||||
(__v8hi)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldv_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shldv_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldv_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shldv_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v4si_maskz((__v4si)__B, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v2di((__v2di)__A, (__v2di)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_shldv_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_shldv_epi64(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v2di_mask((__v2di)__A, (__v2di)__C,
|
||||
(__v2di)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_shldv_epi64(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_shldv_epi64(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpshldv_v2di_maskz((__v2di)__B, (__v2di)__C,
|
||||
(__v2di)__D, (__mmask8)__A);
|
||||
}
|
||||
|
@ -809,51 +669,41 @@ extern __inline __m128i
|
|||
#define __DISABLE_AVX512VBMI2VLBW__
|
||||
#endif /* __AVX512VBMIVLBW__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_compress_epi8(__m256i __A, __mmask32 __B, __m256i __C) {
|
||||
__funline __m256i _mm256_mask_compress_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_compressqi256_mask((__v32qi)__C, (__v32qi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_compress_epi8(__mmask32 __A, __m256i __B) {
|
||||
__funline __m256i _mm256_maskz_compress_epi8(__mmask32 __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_compressqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_compressstoreu_epi8(void *__A, __mmask32 __B, __m256i __C) {
|
||||
__funline void _mm256_mask_compressstoreu_epi8(void *__A, __mmask32 __B,
|
||||
__m256i __C) {
|
||||
__builtin_ia32_compressstoreuqi256_mask((__v32qi *)__A, (__v32qi)__C,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_expand_epi8(__m256i __A, __mmask32 __B, __m256i __C) {
|
||||
__funline __m256i _mm256_mask_expand_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_expandqi256_mask((__v32qi)__C, (__v32qi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_expand_epi8(__mmask32 __A, __m256i __B) {
|
||||
__funline __m256i _mm256_maskz_expand_epi8(__mmask32 __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_expandqi256_maskz(
|
||||
(__v32qi)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_expandloadu_epi8(__m256i __A, __mmask32 __B, const void *__C) {
|
||||
__funline __m256i _mm256_mask_expandloadu_epi8(__m256i __A, __mmask32 __B,
|
||||
const void *__C) {
|
||||
return (__m256i)__builtin_ia32_expandloadqi256_mask(
|
||||
(const __v32qi *)__C, (__v32qi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_expandloadu_epi8(__mmask32 __A, const void *__B) {
|
||||
__funline __m256i _mm256_maskz_expandloadu_epi8(__mmask32 __A, const void *__B) {
|
||||
return (__m256i)__builtin_ia32_expandloadqi256_maskz(
|
||||
(const __v32qi *)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
|
||||
}
|
||||
|
|
53
third_party/intel/avx512vbmiintrin.internal.h
vendored
53
third_party/intel/avx512vbmiintrin.internal.h
vendored
|
@ -11,77 +11,62 @@
|
|||
#define __DISABLE_AVX512VBMI__
|
||||
#endif /* __AVX512VBMI__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) {
|
||||
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
||||
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
|
||||
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
|
||||
__m512i __B) {
|
||||
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
|
||||
__m512i __B) {
|
||||
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermi2varqi512_mask((__v64qi)__A,
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
|
@ -89,10 +74,8 @@ extern __inline __m512i
|
|||
(__mmask64)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
|
||||
__m512i __B) {
|
||||
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
|
|
105
third_party/intel/avx512vbmivlintrin.internal.h
vendored
105
third_party/intel/avx512vbmivlintrin.internal.h
vendored
|
@ -12,123 +12,98 @@
|
|||
#define __DISABLE_AVX512VBMIVL__
|
||||
#endif /* __AVX512VBMIVL__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) {
|
||||
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
||||
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
|
||||
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
|
||||
__m256i __B) {
|
||||
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
|
||||
__m256i __B) {
|
||||
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermi2varqi256_mask((__v32qi)__A,
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
|
@ -136,39 +111,31 @@ extern __inline __m256i
|
|||
(__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
|
||||
__m256i __B) {
|
||||
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
|
||||
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B) {
|
||||
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B) {
|
||||
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I,
|
||||
__mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermi2varqi128_mask((__v16qi)__A,
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
|
@ -176,10 +143,8 @@ extern __inline __m128i
|
|||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B) {
|
||||
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
|
|
2026
third_party/intel/avx512vlbwintrin.internal.h
vendored
2026
third_party/intel/avx512vlbwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
769
third_party/intel/avx512vldqintrin.internal.h
vendored
769
third_party/intel/avx512vldqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
5646
third_party/intel/avx512vlintrin.internal.h
vendored
5646
third_party/intel/avx512vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
62
third_party/intel/avx512vnniintrin.internal.h
vendored
62
third_party/intel/avx512vnniintrin.internal.h
vendored
|
@ -11,92 +11,70 @@
|
|||
#define __DISABLE_AVX512VNNI__
|
||||
#endif /* __AVX512VNNI__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
|
||||
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
}
|
||||
|
|
114
third_party/intel/avx512vnnivlintrin.internal.h
vendored
114
third_party/intel/avx512vnnivlintrin.internal.h
vendored
|
@ -12,172 +12,138 @@
|
|||
#define __DISABLE_AVX512VNNIVL__
|
||||
#endif /* __AVX512VNNIVL__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
|
||||
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
|
|
@ -12,42 +12,32 @@
|
|||
#define __DISABLE_AVX512VPOPCNTDQ__
|
||||
#endif /* __AVX512VPOPCNTDQ__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi32(__m512i __A) {
|
||||
__funline __m512i _mm512_popcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si((__v16si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U, __m512i __B) {
|
||||
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__B, (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi64(__m512i __A) {
|
||||
__funline __m512i _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di((__v8di)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U, __m512i __B) {
|
||||
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
}
|
||||
|
|
|
@ -12,82 +12,60 @@
|
|||
#define __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#endif /* __AVX512VPOPCNTDQVL__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi32(__m128i __A) {
|
||||
__funline __m128i _mm_popcnt_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si((__v4si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
|
||||
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask(
|
||||
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi32(__m256i __A) {
|
||||
__funline __m256i _mm256_popcnt_epi32(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si((__v8si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U, __m256i __B) {
|
||||
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
|
||||
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask(
|
||||
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi64(__m128i __A) {
|
||||
__funline __m128i _mm_popcnt_epi64(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di((__v2di)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask(
|
||||
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi64(__m256i __A) {
|
||||
__funline __m256i _mm256_popcnt_epi64(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di((__v4di)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U, __m256i __B) {
|
||||
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask(
|
||||
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
|
765
third_party/intel/avxintrin.internal.h
vendored
765
third_party/intel/avxintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
37
third_party/intel/bmi2intrin.internal.h
vendored
37
third_party/intel/bmi2intrin.internal.h
vendored
|
@ -11,47 +11,37 @@
|
|||
#define __DISABLE_BMI2__
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32(unsigned int __X, unsigned int __Y) {
|
||||
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_bzhi_si(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32(unsigned int __X, unsigned int __Y) {
|
||||
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pdep_si(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32(unsigned int __X, unsigned int __Y) {
|
||||
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pext_si(__X, __Y);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
__funline unsigned long long _bzhi_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_bzhi_di(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
__funline unsigned long long _pdep_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pdep_di(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
__funline unsigned long long _pext_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pext_di(__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u64(unsigned long long __X, unsigned long long __Y,
|
||||
__funline unsigned long long _mulx_u64(unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
unsigned __int128 __res = (unsigned __int128)__X * __Y;
|
||||
*__P = (unsigned long long)(__res >> 64);
|
||||
|
@ -60,9 +50,8 @@ extern __inline unsigned long long
|
|||
|
||||
#else /* !__x86_64__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
|
||||
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
unsigned long long __res = (unsigned long long)__X * __Y;
|
||||
*__P = (unsigned int)(__res >> 32);
|
||||
return (unsigned int)__res;
|
||||
|
|
44
third_party/intel/cetintrin.internal.h
vendored
44
third_party/intel/cetintrin.internal.h
vendored
|
@ -12,22 +12,16 @@
|
|||
#endif /* __SHSTK__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp(void) {
|
||||
__funline unsigned long long _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspq();
|
||||
}
|
||||
#else
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp(void) {
|
||||
__funline unsigned int _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspd();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_inc_ssp(unsigned int __B) {
|
||||
__funline void _inc_ssp(unsigned int __B) {
|
||||
#ifdef __x86_64__
|
||||
__builtin_ia32_incsspq((unsigned long long)__B);
|
||||
#else
|
||||
|
@ -35,55 +29,39 @@ extern __inline void
|
|||
#endif
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_saveprevssp(void) {
|
||||
__funline void _saveprevssp(void) {
|
||||
__builtin_ia32_saveprevssp();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rstorssp(void *__B) {
|
||||
__funline void _rstorssp(void *__B) {
|
||||
__builtin_ia32_rstorssp(__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssd(unsigned int __B, void *__C) {
|
||||
__funline void _wrssd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrssd(__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssq(unsigned long long __B, void *__C) {
|
||||
__funline void _wrssq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrssq(__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussd(unsigned int __B, void *__C) {
|
||||
__funline void _wrussd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrussd(__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussq(unsigned long long __B, void *__C) {
|
||||
__funline void _wrussq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrussq(__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_setssbsy(void) {
|
||||
__funline void _setssbsy(void) {
|
||||
__builtin_ia32_setssbsy();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_clrssbsy(void *__B) {
|
||||
__funline void _clrssbsy(void *__B) {
|
||||
__builtin_ia32_clrssbsy(__B);
|
||||
}
|
||||
|
||||
|
|
4
third_party/intel/cldemoteintrin.internal.h
vendored
4
third_party/intel/cldemoteintrin.internal.h
vendored
|
@ -10,9 +10,7 @@
|
|||
#pragma GCC target("cldemote")
|
||||
#define __DISABLE_CLDEMOTE__
|
||||
#endif /* __CLDEMOTE__ */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cldemote(void *__A) {
|
||||
__funline void _cldemote(void *__A) {
|
||||
__builtin_ia32_cldemote(__A);
|
||||
}
|
||||
#ifdef __DISABLE_CLDEMOTE__
|
||||
|
|
|
@ -11,9 +11,7 @@
|
|||
#define __DISABLE_CLFLUSHOPT__
|
||||
#endif /* __CLFLUSHOPT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clflushopt(void *__A) {
|
||||
__funline void _mm_clflushopt(void *__A) {
|
||||
__builtin_ia32_clflushopt(__A);
|
||||
}
|
||||
|
||||
|
|
4
third_party/intel/clwbintrin.internal.h
vendored
4
third_party/intel/clwbintrin.internal.h
vendored
|
@ -11,9 +11,7 @@
|
|||
#define __DISABLE_CLWB__
|
||||
#endif /* __CLWB__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clwb(void *__A) {
|
||||
__funline void _mm_clwb(void *__A) {
|
||||
__builtin_ia32_clwb(__A);
|
||||
}
|
||||
|
||||
|
|
4
third_party/intel/clzerointrin.internal.h
vendored
4
third_party/intel/clzerointrin.internal.h
vendored
|
@ -8,9 +8,7 @@
|
|||
#define __DISABLE_CLZERO__
|
||||
#endif /* __CLZERO__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clzero(void* __I) {
|
||||
__funline void _mm_clzero(void* __I) {
|
||||
__builtin_ia32_clzero(__I);
|
||||
}
|
||||
|
||||
|
|
941
third_party/intel/emmintrin.internal.h
vendored
941
third_party/intel/emmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
24
third_party/intel/f16cintrin.internal.h
vendored
24
third_party/intel/f16cintrin.internal.h
vendored
|
@ -12,44 +12,32 @@
|
|||
#define __DISABLE_F16C__
|
||||
#endif /* __F16C__ */
|
||||
|
||||
extern __inline float
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtsh_ss(unsigned short __S) {
|
||||
__funline float _cvtsh_ss(unsigned short __S) {
|
||||
__v8hi __H = __extension__(__v8hi){(short)__S, 0, 0, 0, 0, 0, 0, 0};
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps(__H);
|
||||
return __builtin_ia32_vec_ext_v4sf(__A, 0);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtph_ps(__m128i __A) {
|
||||
__funline __m128 _mm_cvtph_ps(__m128i __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtph_ps(__m128i __A) {
|
||||
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtss_sh(float __F, const int __I) {
|
||||
__funline unsigned short _cvtss_sh(float __F, const int __I) {
|
||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0};
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I);
|
||||
return (unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtps_ph(__m128 __A, const int __I) {
|
||||
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__A, __I);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtps_ph(__m256 __A, const int __I) {
|
||||
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__A, __I);
|
||||
}
|
||||
#else
|
||||
|
|
128
third_party/intel/fma4intrin.internal.h
vendored
128
third_party/intel/fma4intrin.internal.h
vendored
|
@ -13,229 +13,165 @@
|
|||
#define __DISABLE_FMA4__
|
||||
#endif /* __FMA4__ */
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
/* 256b Floating point multiply/add type instructions. */
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
|
128
third_party/intel/fmaintrin.internal.h
vendored
128
third_party/intel/fmaintrin.internal.h
vendored
|
@ -11,224 +11,160 @@
|
|||
#define __DISABLE_FMA__
|
||||
#endif /* __FMA__ */
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
|
16
third_party/intel/fxsrintrin.internal.h
vendored
16
third_party/intel/fxsrintrin.internal.h
vendored
|
@ -11,28 +11,20 @@
|
|||
#define __DISABLE_FXSR__
|
||||
#endif /* __FXSR__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave(void *__P) {
|
||||
__funline void _fxsave(void *__P) {
|
||||
__builtin_ia32_fxsave(__P);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor(void *__P) {
|
||||
__funline void _fxrstor(void *__P) {
|
||||
__builtin_ia32_fxrstor(__P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave64(void *__P) {
|
||||
__funline void _fxsave64(void *__P) {
|
||||
__builtin_ia32_fxsave64(__P);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor64(void *__P) {
|
||||
__funline void _fxrstor64(void *__P) {
|
||||
__builtin_ia32_fxrstor64(__P);
|
||||
}
|
||||
#endif
|
||||
|
|
147
third_party/intel/gfniintrin.internal.h
vendored
147
third_party/intel/gfniintrin.internal.h
vendored
|
@ -11,23 +11,19 @@
|
|||
#define __DISABLE_GFNI__
|
||||
#endif /* __GFNI__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B, const int __C) {
|
||||
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B, const int __C) {
|
||||
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
}
|
||||
|
@ -51,23 +47,19 @@ extern __inline __m128i
|
|||
#define __DISABLE_GFNIAVX__
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B, const int __C) {
|
||||
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B, const int __C) {
|
||||
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
}
|
||||
|
@ -91,49 +83,42 @@ extern __inline __m256i
|
|||
#define __DISABLE_GFNIAVX512VL__
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
_mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C, __m128i __D) {
|
||||
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E) {
|
||||
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B, __m128i __C,
|
||||
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C,
|
||||
const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E) {
|
||||
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D) {
|
||||
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C, const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
}
|
||||
|
@ -167,50 +152,43 @@ extern __inline __m128i
|
|||
#define __DISABLE_GFNIAVX512VLBW__
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B, __m256i __C) {
|
||||
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A, __mmask32 __B,
|
||||
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,
|
||||
__mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D) {
|
||||
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,
|
||||
__m256i __B, __m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D, const int __E) {
|
||||
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B, __m256i __C,
|
||||
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
|
@ -245,69 +223,58 @@ extern __inline __m256i
|
|||
#define __DISABLE_GFNIAVX512FBW__
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B, __m512i __C) {
|
||||
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A, __mmask64 __B,
|
||||
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,
|
||||
__mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C, const int __D) {
|
||||
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,
|
||||
__m512i __B, __m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B, const int __C) {
|
||||
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E) {
|
||||
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B, __m512i __C,
|
||||
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B, const int __C) {
|
||||
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
}
|
||||
|
|
113
third_party/intel/ia32intrin.internal.h
vendored
113
third_party/intel/ia32intrin.internal.h
vendored
|
@ -2,21 +2,15 @@
|
|||
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfd(int __X) {
|
||||
__funline int __bsfd(int __X) {
|
||||
return __builtin_ctz(__X);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrd(int __X) {
|
||||
__funline int __bsrd(int __X) {
|
||||
return __builtin_ia32_bsrsi(__X);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapd(int __X) {
|
||||
__funline int __bswapd(int __X) {
|
||||
return __builtin_bswap32(__X);
|
||||
}
|
||||
|
||||
|
@ -28,21 +22,15 @@ extern __inline int
|
|||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32b(unsigned int __C, unsigned char __V) {
|
||||
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) {
|
||||
return __builtin_ia32_crc32qi(__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32w(unsigned int __C, unsigned short __V) {
|
||||
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) {
|
||||
return __builtin_ia32_crc32hi(__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32d(unsigned int __C, unsigned int __V) {
|
||||
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) {
|
||||
return __builtin_ia32_crc32si(__C, __V);
|
||||
}
|
||||
|
||||
|
@ -53,99 +41,71 @@ extern __inline unsigned int
|
|||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntd(unsigned int __X) {
|
||||
__funline int __popcntd(unsigned int __X) {
|
||||
return __builtin_popcount(__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdpmc(int __S) {
|
||||
__funline unsigned long long __rdpmc(int __S) {
|
||||
return __builtin_ia32_rdpmc(__S);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdtsc(void) {
|
||||
__funline unsigned long long __rdtsc(void) {
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdtscp(unsigned int *__A) {
|
||||
__funline unsigned long long __rdtscp(unsigned int *__A) {
|
||||
return __builtin_ia32_rdtscp(__A);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolb(unsigned char __X, int __C) {
|
||||
__funline unsigned char __rolb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rolqi(__X, __C);
|
||||
}
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolw(unsigned short __X, int __C) {
|
||||
__funline unsigned short __rolw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rolhi(__X, __C);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rold(unsigned int __X, int __C) {
|
||||
__funline unsigned int __rold(unsigned int __X, int __C) {
|
||||
__C &= 31;
|
||||
return (__X << __C) | (__X >> (-__C & 31));
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorb(unsigned char __X, int __C) {
|
||||
__funline unsigned char __rorb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rorqi(__X, __C);
|
||||
}
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorw(unsigned short __X, int __C) {
|
||||
__funline unsigned short __rorw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rorhi(__X, __C);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rord(unsigned int __X, int __C) {
|
||||
__funline unsigned int __rord(unsigned int __X, int __C) {
|
||||
__C &= 31;
|
||||
return (__X >> __C) | (__X << (-__C & 31));
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__pause(void) {
|
||||
__funline void __pause(void) {
|
||||
__builtin_ia32_pause();
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfq(long long __X) {
|
||||
__funline int __bsfq(long long __X) {
|
||||
return __builtin_ctzll(__X);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrq(long long __X) {
|
||||
__funline int __bsrq(long long __X) {
|
||||
return __builtin_ia32_bsrdi(__X);
|
||||
}
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapq(long long __X) {
|
||||
__funline long long __bswapq(long long __X) {
|
||||
return __builtin_bswap64(__X);
|
||||
}
|
||||
|
||||
|
@ -155,9 +115,8 @@ extern __inline long long
|
|||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32q(unsigned long long __C, unsigned long long __V) {
|
||||
__funline unsigned long long __crc32q(unsigned long long __C,
|
||||
unsigned long long __V) {
|
||||
return __builtin_ia32_crc32di(__C, __V);
|
||||
}
|
||||
|
||||
|
@ -166,35 +125,25 @@ extern __inline unsigned long long
|
|||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntq(unsigned long long __X) {
|
||||
__funline long long __popcntq(unsigned long long __X) {
|
||||
return __builtin_popcountll(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolq(unsigned long long __X, int __C) {
|
||||
__funline unsigned long long __rolq(unsigned long long __X, int __C) {
|
||||
__C &= 63;
|
||||
return (__X << __C) | (__X >> (-__C & 63));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorq(unsigned long long __X, int __C) {
|
||||
__funline unsigned long long __rorq(unsigned long long __X, int __C) {
|
||||
__C &= 63;
|
||||
return (__X >> __C) | (__X << (-__C & 63));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags(void) {
|
||||
__funline unsigned long long __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u64();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags(unsigned long long __X) {
|
||||
__funline void __writeeflags(unsigned long long __X) {
|
||||
__builtin_ia32_writeeflags_u64(__X);
|
||||
}
|
||||
|
||||
|
@ -202,15 +151,11 @@ extern __inline void
|
|||
#define _popcnt64(a) __popcntq(a)
|
||||
#else
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags(void) {
|
||||
__funline unsigned int __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u32();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags(unsigned int __X) {
|
||||
__funline void __writeeflags(unsigned int __X) {
|
||||
__builtin_ia32_writeeflags_u32(__X);
|
||||
}
|
||||
|
||||
|
|
60
third_party/intel/immintrin.internal.h
vendored
60
third_party/intel/immintrin.internal.h
vendored
|
@ -65,9 +65,7 @@
|
|||
#include "third_party/intel/pkuintrin.internal.h"
|
||||
/* clang-format on */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wbinvd(void) {
|
||||
__funline void _wbinvd(void) {
|
||||
__builtin_ia32_wbinvd();
|
||||
}
|
||||
|
||||
|
@ -76,15 +74,11 @@ extern __inline void
|
|||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdrand16_step(unsigned short *__P) {
|
||||
__funline int _rdrand16_step(unsigned short *__P) {
|
||||
return __builtin_ia32_rdrand16_step(__P);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdrand32_step(unsigned int *__P) {
|
||||
__funline int _rdrand32_step(unsigned int *__P) {
|
||||
return __builtin_ia32_rdrand32_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
|
@ -97,9 +91,7 @@ extern __inline int
|
|||
#pragma GCC target("rdpid")
|
||||
#define __DISABLE_RDPID__
|
||||
#endif /* __RDPID__ */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdpid_u32(void) {
|
||||
__funline unsigned int _rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
}
|
||||
#ifdef __DISABLE_RDPID__
|
||||
|
@ -114,51 +106,35 @@ extern __inline unsigned int
|
|||
#pragma GCC target("fsgsbase")
|
||||
#define __DISABLE_FSGSBASE__
|
||||
#endif /* __FSGSBASE__ */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_readfsbase_u32(void) {
|
||||
__funline unsigned int _readfsbase_u32(void) {
|
||||
return __builtin_ia32_rdfsbase32();
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_readfsbase_u64(void) {
|
||||
__funline unsigned long long _readfsbase_u64(void) {
|
||||
return __builtin_ia32_rdfsbase64();
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_readgsbase_u32(void) {
|
||||
__funline unsigned int _readgsbase_u32(void) {
|
||||
return __builtin_ia32_rdgsbase32();
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_readgsbase_u64(void) {
|
||||
__funline unsigned long long _readgsbase_u64(void) {
|
||||
return __builtin_ia32_rdgsbase64();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_writefsbase_u32(unsigned int __B) {
|
||||
__funline void _writefsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrfsbase32(__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_writefsbase_u64(unsigned long long __B) {
|
||||
__funline void _writefsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrfsbase64(__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_writegsbase_u32(unsigned int __B) {
|
||||
__funline void _writegsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrgsbase32(__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_writegsbase_u64(unsigned long long __B) {
|
||||
__funline void _writegsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrgsbase64(__B);
|
||||
}
|
||||
#ifdef __DISABLE_FSGSBASE__
|
||||
|
@ -171,9 +147,7 @@ extern __inline void
|
|||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdrand64_step(unsigned long long *__P) {
|
||||
__funline int _rdrand64_step(unsigned long long *__P) {
|
||||
return __builtin_ia32_rdrand64_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
|
@ -190,16 +164,12 @@ extern __inline int
|
|||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_ptwrite64(unsigned long long __B) {
|
||||
__funline void _ptwrite64(unsigned long long __B) {
|
||||
__builtin_ia32_ptwrite64(__B);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_ptwrite32(unsigned __B) {
|
||||
__funline void _ptwrite32(unsigned __B) {
|
||||
__builtin_ia32_ptwrite32(__B);
|
||||
}
|
||||
#ifdef __DISABLE_PTWRITE__
|
||||
|
|
27
third_party/intel/lwpintrin.internal.h
vendored
27
third_party/intel/lwpintrin.internal.h
vendored
|
@ -11,28 +11,22 @@
|
|||
#define __DISABLE_LWP__
|
||||
#endif /* __LWP__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__llwpcb(void *__pcbAddress) {
|
||||
__funline void __llwpcb(void *__pcbAddress) {
|
||||
__builtin_ia32_llwpcb(__pcbAddress);
|
||||
}
|
||||
|
||||
extern __inline void *__attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__)) __slwpcb(void) {
|
||||
__funline void *__slwpcb(void) {
|
||||
return __builtin_ia32_slwpcb();
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
__lwpval32(unsigned int __data2, unsigned int __data1, unsigned int __flags) {
|
||||
__funline void __lwpval32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval32(__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval64(unsigned long long __data2, unsigned int __data1,
|
||||
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval64(__data2, __data1, __flags);
|
||||
}
|
||||
|
@ -49,17 +43,14 @@ extern __inline void
|
|||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__,
|
||||
__artificial__))
|
||||
__lwpins32(unsigned int __data2, unsigned int __data1, unsigned int __flags) {
|
||||
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins32(__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins64(unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__funline unsigned char __lwpins64(unsigned long long __data2,
|
||||
unsigned int __data1, unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins64(__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
|
|
20
third_party/intel/lzcntintrin.internal.h
vendored
20
third_party/intel/lzcntintrin.internal.h
vendored
|
@ -11,34 +11,24 @@
|
|||
#define __DISABLE_LZCNT__
|
||||
#endif /* __LZCNT__ */
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt16(unsigned short __X) {
|
||||
__funline unsigned short __lzcnt16(unsigned short __X) {
|
||||
return __builtin_ia32_lzcnt_u16(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt32(unsigned int __X) {
|
||||
__funline unsigned int __lzcnt32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u32(unsigned int __X) {
|
||||
__funline unsigned int _lzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt64(unsigned long long __X) {
|
||||
__funline unsigned long long __lzcnt64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u64(unsigned long long __X) {
|
||||
__funline unsigned long long _lzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
}
|
||||
#endif
|
||||
|
|
112
third_party/intel/mm3dnow.internal.h
vendored
112
third_party/intel/mm3dnow.internal.h
vendored
|
@ -14,141 +14,95 @@
|
|||
#define __DISABLE_3dNOW__
|
||||
#endif /* __3dNOW__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_femms(void) {
|
||||
__funline void _m_femms(void) {
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pavgusb(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pavgusb((__v8qi)__A, (__v8qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2id(__m64 __A) {
|
||||
__funline __m64 _m_pf2id(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2id((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfacc(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfacc((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfadd(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfadd((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpeq(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpge(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpgt(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmax(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmax((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmin(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmin((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmul(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmul((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcp(__m64 __A) {
|
||||
__funline __m64 _m_pfrcp(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrcp((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit1(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit2(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqrt(__m64 __A) {
|
||||
__funline __m64 _m_pfrsqrt(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqit1(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsub(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsub((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsubr(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsubr((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fd(__m64 __A) {
|
||||
__funline __m64 _m_pi2fd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fd((__v2si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhrw(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__A, (__v4hi)__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetch(void *__P) {
|
||||
__funline void _m_prefetch(void *__P) {
|
||||
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_float(float __A) {
|
||||
__funline __m64 _m_from_float(float __A) {
|
||||
return __extension__(__m64)(__v2sf){__A, 0.0f};
|
||||
}
|
||||
|
||||
extern __inline float
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_float(__m64 __A) {
|
||||
__funline float _m_to_float(__m64 __A) {
|
||||
union {
|
||||
__v2sf v;
|
||||
float a[2];
|
||||
|
@ -172,33 +126,23 @@ extern __inline float
|
|||
#define __DISABLE_3dNOW_A__
|
||||
#endif /* __3dNOW_A__ */
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2iw(__m64 __A) {
|
||||
__funline __m64 _m_pf2iw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2iw((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfnacc(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfnacc((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfpnacc(__m64 __A, __m64 __B) {
|
||||
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fw(__m64 __A) {
|
||||
__funline __m64 _m_pi2fw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fw((__v2si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pswapd(__m64 __A) {
|
||||
__funline __m64 _m_pswapd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__A);
|
||||
}
|
||||
|
||||
|
|
518
third_party/intel/mmintrin.internal.h
vendored
518
third_party/intel/mmintrin.internal.h
vendored
|
@ -23,231 +23,157 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
|
|||
typedef long long __v1di __attribute__((__vector_size__(8)));
|
||||
typedef float __v2sf __attribute__((__vector_size__(8)));
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_empty(void) {
|
||||
__funline void _mm_empty(void) {
|
||||
__builtin_ia32_emms();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_empty(void) {
|
||||
__funline void _m_empty(void) {
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi32_si64(int __i) {
|
||||
__funline __m64 _mm_cvtsi32_si64(int __i) {
|
||||
return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_int(int __i) {
|
||||
__funline __m64 _m_from_int(int __i) {
|
||||
return _mm_cvtsi32_si64(__i);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_int64(long long __i) {
|
||||
__funline __m64 _m_from_int64(long long __i) {
|
||||
return (__m64)__i;
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_m64(long long __i) {
|
||||
__funline __m64 _mm_cvtsi64_m64(long long __i) {
|
||||
return (__m64)__i;
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64x_si64(long long __i) {
|
||||
__funline __m64 _mm_cvtsi64x_si64(long long __i) {
|
||||
return (__m64)__i;
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi64x(long long __i) {
|
||||
__funline __m64 _mm_set_pi64x(long long __i) {
|
||||
return (__m64)__i;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_si32(__m64 __i) {
|
||||
__funline int _mm_cvtsi64_si32(__m64 __i) {
|
||||
return __builtin_ia32_vec_ext_v2si((__v2si)__i, 0);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_int(__m64 __i) {
|
||||
__funline int _m_to_int(__m64 __i) {
|
||||
return _mm_cvtsi64_si32(__i);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_int64(__m64 __i) {
|
||||
__funline long long _m_to_int64(__m64 __i) {
|
||||
return (long long)__i;
|
||||
}
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtm64_si64(__m64 __i) {
|
||||
__funline long long _mm_cvtm64_si64(__m64 __i) {
|
||||
return (long long)__i;
|
||||
}
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_si64x(__m64 __i) {
|
||||
__funline long long _mm_cvtsi64_si64x(__m64 __i) {
|
||||
return (long long)__i;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_packs_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packsswb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_packsswb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_packs_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_packs_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packssdw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_packssdw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_packs_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pu16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_packs_pu16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packuswb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_packuswb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_packs_pu16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhbw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpckhbw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpackhi_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhwd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpckhwd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpackhi_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhdq(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpckhdq(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpackhi_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpcklbw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpcklbw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpacklo_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpcklwd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpcklwd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpacklo_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckldq(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_punpckldq(__m64 __m1, __m64 __m2) {
|
||||
return _mm_unpacklo_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_add_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_add_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_add_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_add_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_add_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_add_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
|
@ -257,9 +183,7 @@ extern __inline __m64
|
|||
#define __DISABLE_SSE2__
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_add_si64(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddq((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
#ifdef __DISABLE_SSE2__
|
||||
|
@ -267,87 +191,59 @@ extern __inline __m64
|
|||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE2__ */
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_adds_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddsb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddsb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_adds_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_adds_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddsw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddsw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_adds_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pu8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_adds_pu8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddusb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddusb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_adds_pu8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pu16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_adds_pu16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddusw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_paddusw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_adds_pu16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_sub_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_sub_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_sub_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_sub_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_sub_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_sub_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
|
@ -357,9 +253,7 @@ extern __inline __m64
|
|||
#define __DISABLE_SSE2__
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_sub_si64(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubq((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
#ifdef __DISABLE_SSE2__
|
||||
|
@ -367,462 +261,310 @@ extern __inline __m64
|
|||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE2__ */
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_subs_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubsb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubsb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_subs_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_subs_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubsw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubsw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_subs_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pu8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_subs_pu8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubusb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubusb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_subs_pu8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pu16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_subs_pu16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubusw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_psubusw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_subs_pu16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_madd_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmaddwd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pmaddwd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_madd_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pmulhw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_mulhi_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_mullo_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmullw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pmullw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_mullo_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_pi16(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_sll_pi16(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psllw((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllw(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psllw(__m64 __m, __m64 __count) {
|
||||
return _mm_sll_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_pi16(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_slli_pi16(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllwi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psllwi(__m64 __m, int __count) {
|
||||
return _mm_slli_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_pi32(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_sll_pi32(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_pslld((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pslld(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_pslld(__m64 __m, __m64 __count) {
|
||||
return _mm_sll_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_pi32(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_slli_pi32(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pslldi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_pslldi(__m64 __m, int __count) {
|
||||
return _mm_slli_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_si64(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_sll_si64(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psllq((__v1di)__m, (__v1di)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllq(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psllq(__m64 __m, __m64 __count) {
|
||||
return _mm_sll_si64(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_si64(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_slli_si64(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllqi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psllqi(__m64 __m, int __count) {
|
||||
return _mm_slli_si64(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sra_pi16(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_sra_pi16(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psraw((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psraw(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psraw(__m64 __m, __m64 __count) {
|
||||
return _mm_sra_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srai_pi16(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_srai_pi16(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrawi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psrawi(__m64 __m, int __count) {
|
||||
return _mm_srai_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sra_pi32(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_sra_pi32(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psrad((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrad(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psrad(__m64 __m, __m64 __count) {
|
||||
return _mm_sra_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srai_pi32(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_srai_pi32(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psradi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psradi(__m64 __m, int __count) {
|
||||
return _mm_srai_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_pi16(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_srl_pi16(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psrlw((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlw(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psrlw(__m64 __m, __m64 __count) {
|
||||
return _mm_srl_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_pi16(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_srli_pi16(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlwi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psrlwi(__m64 __m, int __count) {
|
||||
return _mm_srli_pi16(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_pi32(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_srl_pi32(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psrld((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrld(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psrld(__m64 __m, __m64 __count) {
|
||||
return _mm_srl_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_pi32(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_srli_pi32(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrldi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psrldi(__m64 __m, int __count) {
|
||||
return _mm_srli_pi32(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_si64(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _mm_srl_si64(__m64 __m, __m64 __count) {
|
||||
return (__m64)__builtin_ia32_psrlq((__v1di)__m, (__v1di)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlq(__m64 __m, __m64 __count) {
|
||||
__funline __m64 _m_psrlq(__m64 __m, __m64 __count) {
|
||||
return _mm_srl_si64(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_si64(__m64 __m, int __count) {
|
||||
__funline __m64 _mm_srli_si64(__m64 __m, int __count) {
|
||||
return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlqi(__m64 __m, int __count) {
|
||||
__funline __m64 _m_psrlqi(__m64 __m, int __count) {
|
||||
return _mm_srli_si64(__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_and_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_and_si64(__m64 __m1, __m64 __m2) {
|
||||
return __builtin_ia32_pand(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pand(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pand(__m64 __m1, __m64 __m2) {
|
||||
return _mm_and_si64(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_andnot_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_andnot_si64(__m64 __m1, __m64 __m2) {
|
||||
return __builtin_ia32_pandn(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pandn(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pandn(__m64 __m1, __m64 __m2) {
|
||||
return _mm_andnot_si64(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_or_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_or_si64(__m64 __m1, __m64 __m2) {
|
||||
return __builtin_ia32_por(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_por(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_por(__m64 __m1, __m64 __m2) {
|
||||
return _mm_or_si64(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_xor_si64(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_xor_si64(__m64 __m1, __m64 __m2) {
|
||||
return __builtin_ia32_pxor(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pxor(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pxor(__m64 __m1, __m64 __m2) {
|
||||
return _mm_xor_si64(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpeqb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpeq_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtb(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpgtb(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpgt_pi8(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpeqw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpeq_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtw(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpgtw(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpgt_pi16(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpeqd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpeq_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtd(__m64 __m1, __m64 __m2) {
|
||||
__funline __m64 _m_pcmpgtd(__m64 __m1, __m64 __m2) {
|
||||
return _mm_cmpgt_pi32(__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setzero_si64(void) {
|
||||
__funline __m64 _mm_setzero_si64(void) {
|
||||
return (__m64)0LL;
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi32(int __i1, int __i0) {
|
||||
__funline __m64 _mm_set_pi32(int __i1, int __i0) {
|
||||
return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
|
||||
__funline __m64 _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
|
||||
return (__m64)__builtin_ia32_vec_init_v4hi(__w0, __w1, __w2, __w3);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
|
||||
__funline __m64 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
|
||||
char __b2, char __b1, char __b0) {
|
||||
return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5,
|
||||
__b6, __b7);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi32(int __i0, int __i1) {
|
||||
__funline __m64 _mm_setr_pi32(int __i0, int __i1) {
|
||||
return _mm_set_pi32(__i1, __i0);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
|
||||
__funline __m64 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
|
||||
return _mm_set_pi16(__w3, __w2, __w1, __w0);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4,
|
||||
char __b5, char __b6, char __b7) {
|
||||
__funline __m64 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3,
|
||||
char __b4, char __b5, char __b6, char __b7) {
|
||||
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi32(int __i) {
|
||||
__funline __m64 _mm_set1_pi32(int __i) {
|
||||
return _mm_set_pi32(__i, __i);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi16(short __w) {
|
||||
__funline __m64 _mm_set1_pi16(short __w) {
|
||||
return _mm_set_pi16(__w, __w, __w, __w);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi8(char __b) {
|
||||
__funline __m64 _mm_set1_pi8(char __b) {
|
||||
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
|
||||
}
|
||||
#ifdef __DISABLE_MMX__
|
||||
|
|
12
third_party/intel/movdirintrin.internal.h
vendored
12
third_party/intel/movdirintrin.internal.h
vendored
|
@ -11,15 +11,11 @@
|
|||
#define __DISABLE_MOVDIRI__
|
||||
#endif /* __MOVDIRI__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u32(void *__P, unsigned int __A) {
|
||||
__funline void _directstoreu_u32(void *__P, unsigned int __A) {
|
||||
__builtin_ia32_directstoreu_u32((unsigned int *)__P, __A);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u64(void *__P, unsigned long long __A) {
|
||||
__funline void _directstoreu_u64(void *__P, unsigned long long __A) {
|
||||
__builtin_ia32_directstoreu_u64((unsigned long long *)__P, __A);
|
||||
}
|
||||
#endif
|
||||
|
@ -35,9 +31,7 @@ extern __inline void
|
|||
#define __DISABLE_MOVDIR64B__
|
||||
#endif /* __MOVDIR64B__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_movdir64b(void *__P, const void *__Q) {
|
||||
__funline void _movdir64b(void *__P, const void *__Q) {
|
||||
__builtin_ia32_movdir64b(__P, __Q);
|
||||
}
|
||||
|
||||
|
|
8
third_party/intel/mwaitxintrin.internal.h
vendored
8
third_party/intel/mwaitxintrin.internal.h
vendored
|
@ -8,15 +8,11 @@
|
|||
#define __DISABLE_MWAITX__
|
||||
#endif /* __MWAITX__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitorx(void const* __P, unsigned int __E, unsigned int __H) {
|
||||
__funline void _mm_monitorx(void const* __P, unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_monitorx(__P, __E, __H);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwaitx(unsigned int __E, unsigned int __H, unsigned int __C) {
|
||||
__funline void _mm_mwaitx(unsigned int __E, unsigned int __H, unsigned int __C) {
|
||||
__builtin_ia32_mwaitx(__E, __H, __C);
|
||||
}
|
||||
|
||||
|
|
4
third_party/intel/pconfigintrin.internal.h
vendored
4
third_party/intel/pconfigintrin.internal.h
vendored
|
@ -24,9 +24,7 @@
|
|||
: "a"(leaf), "b"(b), "c"(c), "d"(d) \
|
||||
: "cc")
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pconfig_u32(const unsigned int __L, size_t __D[]) {
|
||||
__funline unsigned int _pconfig_u32(const unsigned int __L, size_t __D[]) {
|
||||
enum __pconfig_type {
|
||||
__PCONFIG_KEY_PROGRAM = 0x01,
|
||||
};
|
||||
|
|
8
third_party/intel/pkuintrin.internal.h
vendored
8
third_party/intel/pkuintrin.internal.h
vendored
|
@ -11,15 +11,11 @@
|
|||
#define __DISABLE_PKU__
|
||||
#endif /* __PKU__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdpkru_u32(void) {
|
||||
__funline unsigned int _rdpkru_u32(void) {
|
||||
return __builtin_ia32_rdpkru();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrpkru(unsigned int __key) {
|
||||
__funline void _wrpkru(unsigned int __key) {
|
||||
__builtin_ia32_wrpkru(__key);
|
||||
}
|
||||
|
||||
|
|
52
third_party/intel/pmmintrin.internal.h
vendored
52
third_party/intel/pmmintrin.internal.h
vendored
|
@ -17,81 +17,55 @@
|
|||
_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
||||
#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_ps(__m128 __X, __m128 __Y) {
|
||||
__funline __m128 _mm_addsub_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_addsubps((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_ps(__m128 __X, __m128 __Y) {
|
||||
__funline __m128 _mm_hadd_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_haddps((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_ps(__m128 __X, __m128 __Y) {
|
||||
__funline __m128 _mm_hsub_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_hsubps((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movehdup_ps(__m128 __X) {
|
||||
__funline __m128 _mm_movehdup_ps(__m128 __X) {
|
||||
return (__m128)__builtin_ia32_movshdup((__v4sf)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_moveldup_ps(__m128 __X) {
|
||||
__funline __m128 _mm_moveldup_ps(__m128 __X) {
|
||||
return (__m128)__builtin_ia32_movsldup((__v4sf)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_pd(__m128d __X, __m128d __Y) {
|
||||
__funline __m128d _mm_addsub_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_addsubpd((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pd(__m128d __X, __m128d __Y) {
|
||||
__funline __m128d _mm_hadd_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_haddpd((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pd(__m128d __X, __m128d __Y) {
|
||||
__funline __m128d _mm_hsub_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_hsubpd((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loaddup_pd(double const *__P) {
|
||||
__funline __m128d _mm_loaddup_pd(double const *__P) {
|
||||
return _mm_load1_pd(__P);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movedup_pd(__m128d __X) {
|
||||
__funline __m128d _mm_movedup_pd(__m128d __X) {
|
||||
return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0));
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_lddqu_si128(__m128i const *__P) {
|
||||
__funline __m128i _mm_lddqu_si128(__m128i const *__P) {
|
||||
return (__m128i)__builtin_ia32_lddqu((char const *)__P);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitor(void const *__P, unsigned int __E, unsigned int __H) {
|
||||
__funline void _mm_monitor(void const *__P, unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_monitor(__P, __E, __H);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwait(unsigned int __E, unsigned int __H) {
|
||||
__funline void _mm_mwait(unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_mwait(__E, __H);
|
||||
}
|
||||
|
||||
|
|
8
third_party/intel/popcntintrin.internal.h
vendored
8
third_party/intel/popcntintrin.internal.h
vendored
|
@ -8,16 +8,12 @@
|
|||
#define __DISABLE_POPCNT__
|
||||
#endif /* __POPCNT__ */
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_u32(unsigned int __X) {
|
||||
__funline int _mm_popcnt_u32(unsigned int __X) {
|
||||
return __builtin_popcount(__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_u64(unsigned long long __X) {
|
||||
__funline long long _mm_popcnt_u64(unsigned long long __X) {
|
||||
return __builtin_popcountll(__X);
|
||||
}
|
||||
#endif
|
||||
|
|
4
third_party/intel/prfchwintrin.internal.h
vendored
4
third_party/intel/prfchwintrin.internal.h
vendored
|
@ -6,9 +6,7 @@
|
|||
#ifndef _PRFCHWINTRIN_H_INCLUDED
|
||||
#define _PRFCHWINTRIN_H_INCLUDED
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetchw(void *__P) {
|
||||
__funline void _m_prefetchw(void *__P) {
|
||||
__builtin_prefetch(__P, 1, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
|
||||
|
|
12
third_party/intel/rdseedintrin.internal.h
vendored
12
third_party/intel/rdseedintrin.internal.h
vendored
|
@ -11,22 +11,16 @@
|
|||
#define __DISABLE_RDSEED__
|
||||
#endif /* __RDSEED__ */
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed16_step(unsigned short *__p) {
|
||||
__funline int _rdseed16_step(unsigned short *__p) {
|
||||
return __builtin_ia32_rdseed_hi_step(__p);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed32_step(unsigned int *__p) {
|
||||
__funline int _rdseed32_step(unsigned int *__p) {
|
||||
return __builtin_ia32_rdseed_si_step(__p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed64_step(unsigned long long *__p) {
|
||||
__funline int _rdseed64_step(unsigned long long *__p) {
|
||||
return __builtin_ia32_rdseed_di_step(__p);
|
||||
}
|
||||
#endif
|
||||
|
|
12
third_party/intel/rtmintrin.internal.h
vendored
12
third_party/intel/rtmintrin.internal.h
vendored
|
@ -20,22 +20,16 @@
|
|||
#define _XABORT_NESTED (1 << 5)
|
||||
#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xbegin(void) {
|
||||
__funline unsigned int _xbegin(void) {
|
||||
return __builtin_ia32_xbegin();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xend(void) {
|
||||
__funline void _xend(void) {
|
||||
__builtin_ia32_xend();
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xabort(const unsigned int __imm) {
|
||||
__funline void _xabort(const unsigned int __imm) {
|
||||
__builtin_ia32_xabort(__imm);
|
||||
}
|
||||
#else
|
||||
|
|
12
third_party/intel/sgxintrin.internal.h
vendored
12
third_party/intel/sgxintrin.internal.h
vendored
|
@ -80,9 +80,7 @@
|
|||
: "a"(leaf), "b"(b), "c"(c), "d"(d) \
|
||||
: "cc")
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_encls_u32(const unsigned int __L, size_t __D[]) {
|
||||
__funline unsigned int _encls_u32(const unsigned int __L, size_t __D[]) {
|
||||
enum __encls_type {
|
||||
__SGX_ECREATE = 0x00,
|
||||
__SGX_EADD = 0x01,
|
||||
|
@ -145,9 +143,7 @@ extern __inline unsigned int
|
|||
return __R;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enclu_u32(const unsigned int __L, size_t __D[]) {
|
||||
__funline unsigned int _enclu_u32(const unsigned int __L, size_t __D[]) {
|
||||
enum __enclu_type {
|
||||
__SGX_EREPORT = 0x00,
|
||||
__SGX_EGETKEY = 0x01,
|
||||
|
@ -186,9 +182,7 @@ extern __inline unsigned int
|
|||
return __R;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enclv_u32(const unsigned int __L, size_t __D[]) {
|
||||
__funline unsigned int _enclv_u32(const unsigned int __L, size_t __D[]) {
|
||||
enum __enclv_type {
|
||||
__SGX_EDECVIRTCHILD = 0x00,
|
||||
__SGX_EINCVIRTCHILD = 0x01,
|
||||
|
|
28
third_party/intel/shaintrin.internal.h
vendored
28
third_party/intel/shaintrin.internal.h
vendored
|
@ -11,28 +11,20 @@
|
|||
#define __DISABLE_SHA__
|
||||
#endif /* __SHA__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1msg1_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha1msg1_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_sha1msg1((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1msg2_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha1msg2_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_sha1msg2((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1nexte_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha1nexte_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_sha1nexte((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1rnds4_epu32(__m128i __A, __m128i __B, const int __I) {
|
||||
__funline __m128i _mm_sha1rnds4_epu32(__m128i __A, __m128i __B, const int __I) {
|
||||
return (__m128i)__builtin_ia32_sha1rnds4((__v4si)__A, (__v4si)__B, __I);
|
||||
}
|
||||
#else
|
||||
|
@ -41,21 +33,15 @@ extern __inline __m128i
|
|||
(int)I))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256msg1_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha256msg1_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_sha256msg1((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256msg2_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha256msg2_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_sha256msg2((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256rnds2_epu32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_sha256rnds2_epu32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
|
284
third_party/intel/smmintrin.internal.h
vendored
284
third_party/intel/smmintrin.internal.h
vendored
|
@ -25,21 +25,15 @@
|
|||
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testz_si128(__m128i __M, __m128i __V) {
|
||||
__funline int _mm_testz_si128(__m128i __M, __m128i __V) {
|
||||
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testc_si128(__m128i __M, __m128i __V) {
|
||||
__funline int _mm_testc_si128(__m128i __M, __m128i __V) {
|
||||
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testnzc_si128(__m128i __M, __m128i __V) {
|
||||
__funline int _mm_testnzc_si128(__m128i __M, __m128i __V) {
|
||||
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
|
@ -50,15 +44,11 @@ extern __inline int
|
|||
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_pd(__m128d __V, const int __M) {
|
||||
__funline __m128d _mm_round_pd(__m128d __V, const int __M) {
|
||||
return (__m128d)__builtin_ia32_roundpd((__v2df)__V, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_sd(__m128d __D, __m128d __V, const int __M) {
|
||||
__funline __m128d _mm_round_sd(__m128d __D, __m128d __V, const int __M) {
|
||||
return (__m128d)__builtin_ia32_roundsd((__v2df)__D, (__v2df)__V, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -71,15 +61,11 @@ extern __inline __m128d
|
|||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ps(__m128 __V, const int __M) {
|
||||
__funline __m128 _mm_round_ps(__m128 __V, const int __M) {
|
||||
return (__m128)__builtin_ia32_roundps((__v4sf)__V, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ss(__m128 __D, __m128 __V, const int __M) {
|
||||
__funline __m128 _mm_round_ss(__m128 __D, __m128 __V, const int __M) {
|
||||
return (__m128)__builtin_ia32_roundss((__v4sf)__D, (__v4sf)__V, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -104,9 +90,7 @@ extern __inline __m128
|
|||
#define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_epi16(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline __m128i _mm_blend_epi16(__m128i __X, __m128i __Y, const int __M) {
|
||||
return (__m128i)__builtin_ia32_pblendw128((__v8hi)__X, (__v8hi)__Y, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -115,17 +99,13 @@ extern __inline __m128i
|
|||
(__v8hi)(__m128i)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_epi8(__m128i __X, __m128i __Y, __m128i __M) {
|
||||
__funline __m128i _mm_blendv_epi8(__m128i __X, __m128i __Y, __m128i __M) {
|
||||
return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__X, (__v16qi)__Y,
|
||||
(__v16qi)__M);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_ps(__m128 __X, __m128 __Y, const int __M) {
|
||||
__funline __m128 _mm_blend_ps(__m128 __X, __m128 __Y, const int __M) {
|
||||
return (__m128)__builtin_ia32_blendps((__v4sf)__X, (__v4sf)__Y, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -134,16 +114,12 @@ extern __inline __m128
|
|||
(int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_ps(__m128 __X, __m128 __Y, __m128 __M) {
|
||||
__funline __m128 _mm_blendv_ps(__m128 __X, __m128 __Y, __m128 __M) {
|
||||
return (__m128)__builtin_ia32_blendvps((__v4sf)__X, (__v4sf)__Y, (__v4sf)__M);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_pd(__m128d __X, __m128d __Y, const int __M) {
|
||||
__funline __m128d _mm_blend_pd(__m128d __X, __m128d __Y, const int __M) {
|
||||
return (__m128d)__builtin_ia32_blendpd((__v2df)__X, (__v2df)__Y, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -152,23 +128,17 @@ extern __inline __m128d
|
|||
(int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_pd(__m128d __X, __m128d __Y, __m128d __M) {
|
||||
__funline __m128d _mm_blendv_pd(__m128d __X, __m128d __Y, __m128d __M) {
|
||||
return (__m128d)__builtin_ia32_blendvpd((__v2df)__X, (__v2df)__Y,
|
||||
(__v2df)__M);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dp_ps(__m128 __X, __m128 __Y, const int __M) {
|
||||
__funline __m128 _mm_dp_ps(__m128 __X, __m128 __Y, const int __M) {
|
||||
return (__m128)__builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dp_pd(__m128d __X, __m128d __Y, const int __M) {
|
||||
__funline __m128d _mm_dp_pd(__m128d __X, __m128d __Y, const int __M) {
|
||||
return (__m128d)__builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -181,76 +151,52 @@ extern __inline __m128d
|
|||
(int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)((__v2di)__X == (__v2di)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi8(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_min_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pminsb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi8(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_max_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmaxsb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_min_epu16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pminuw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_max_epu16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmaxuw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_min_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pminsd128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_max_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmaxsd128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_min_epu32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pminud128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_max_epu32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmaxud128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_mullo_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)((__v4su)__X * (__v4su)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mul_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_mul_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_ps(__m128 __D, __m128 __S, const int __N) {
|
||||
__funline __m128 _mm_insert_ps(__m128 __D, __m128 __S, const int __N) {
|
||||
return (__m128)__builtin_ia32_insertps128((__v4sf)__D, (__v4sf)__S, __N);
|
||||
}
|
||||
#else
|
||||
|
@ -262,9 +208,7 @@ extern __inline __m128
|
|||
#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_ps(__m128 __X, const int __N) {
|
||||
__funline int _mm_extract_ps(__m128 __X, const int __N) {
|
||||
union {
|
||||
int i;
|
||||
float f;
|
||||
|
@ -291,22 +235,16 @@ extern __inline int
|
|||
_mm_insert_ps(_mm_setzero_ps(), (X), _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi8(__m128i __D, int __S, const int __N) {
|
||||
__funline __m128i _mm_insert_epi8(__m128i __D, int __S, const int __N) {
|
||||
return (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)__D, __S, __N);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi32(__m128i __D, int __S, const int __N) {
|
||||
__funline __m128i _mm_insert_epi32(__m128i __D, int __S, const int __N) {
|
||||
return (__m128i)__builtin_ia32_vec_set_v4si((__v4si)__D, __S, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi64(__m128i __D, long long __S, const int __N) {
|
||||
__funline __m128i _mm_insert_epi64(__m128i __D, long long __S, const int __N) {
|
||||
return (__m128i)__builtin_ia32_vec_set_v2di((__v2di)__D, __S, __N);
|
||||
}
|
||||
#endif
|
||||
|
@ -327,22 +265,16 @@ extern __inline __m128i
|
|||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi8(__m128i __X, const int __N) {
|
||||
__funline int _mm_extract_epi8(__m128i __X, const int __N) {
|
||||
return (unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)__X, __N);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi32(__m128i __X, const int __N) {
|
||||
__funline int _mm_extract_epi32(__m128i __X, const int __N) {
|
||||
return __builtin_ia32_vec_ext_v4si((__v4si)__X, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi64(__m128i __X, const int __N) {
|
||||
__funline long long _mm_extract_epi64(__m128i __X, const int __N) {
|
||||
return __builtin_ia32_vec_ext_v2di((__v2di)__X, __N);
|
||||
}
|
||||
#endif
|
||||
|
@ -359,94 +291,64 @@ extern __inline long long
|
|||
#endif
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_minpos_epu16(__m128i __X) {
|
||||
__funline __m128i _mm_minpos_epu16(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi32(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi8_epi32(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxbd128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi32(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi16_epi32(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxwd128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi8_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxbq128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi32_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi32_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxdq128((__v4si)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi16_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxwq128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi16(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepi8_epi16(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovsxbw128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi32(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu8_epi32(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxbd128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi32(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu16_epi32(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxwd128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu8_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxbq128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu32_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu32_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxdq128((__v4si)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi64(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu16_epi64(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxwq128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi16(__m128i __X) {
|
||||
__funline __m128i _mm_cvtepu8_epi16(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pmovzxbw128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packus_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_packus_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_packusdw128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mpsadbw_epu8(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline __m128i _mm_mpsadbw_epu8(__m128i __X, __m128i __Y, const int __M) {
|
||||
return (__m128i)__builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
#else
|
||||
|
@ -455,9 +357,7 @@ extern __inline __m128i
|
|||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_load_si128(__m128i *__X) {
|
||||
__funline __m128i _mm_stream_load_si128(__m128i *__X) {
|
||||
return (__m128i)__builtin_ia32_movntdqa((__v2di *)__X);
|
||||
}
|
||||
|
||||
|
@ -489,28 +389,22 @@ extern __inline __m128i
|
|||
#define _SIDD_UNIT_MASK 0x40
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrm(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline __m128i _mm_cmpistrm(__m128i __X, __m128i __Y, const int __M) {
|
||||
return (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistri(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistri(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistri128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrm(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline __m128i _mm_cmpestrm(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)__X, __LX, (__v16qi)__Y,
|
||||
__LY, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestri(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestri(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestri128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
@ -533,67 +427,52 @@ extern __inline int
|
|||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistra(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistra(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistria128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrc(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistrc(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistric128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistro(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistro(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistrio128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrs(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistrs(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistris128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrz(__m128i __X, __m128i __Y, const int __M) {
|
||||
__funline int _mm_cmpistrz(__m128i __X, __m128i __Y, const int __M) {
|
||||
return __builtin_ia32_pcmpistriz128((__v16qi)__X, (__v16qi)__Y, __M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestra(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestra(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestria128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrc(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestrc(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestric128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestro(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestro(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestrio128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrs(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestrs(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestris128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrz(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
|
||||
__funline int _mm_cmpestrz(__m128i __X, int __LX, __m128i __Y, int __LY,
|
||||
const int __M) {
|
||||
return __builtin_ia32_pcmpestriz128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
@ -636,9 +515,7 @@ extern __inline int
|
|||
(int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)((__v2di)__X > (__v2di)__Y);
|
||||
}
|
||||
|
||||
|
@ -667,28 +544,21 @@ extern __inline __m128i
|
|||
#endif /* __SSE4_1__ */
|
||||
|
||||
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u8(unsigned int __C, unsigned char __V) {
|
||||
__funline unsigned int _mm_crc32_u8(unsigned int __C, unsigned char __V) {
|
||||
return __builtin_ia32_crc32qi(__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u16(unsigned int __C, unsigned short __V) {
|
||||
__funline unsigned int _mm_crc32_u16(unsigned int __C, unsigned short __V) {
|
||||
return __builtin_ia32_crc32hi(__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u32(unsigned int __C, unsigned int __V) {
|
||||
__funline unsigned int _mm_crc32_u32(unsigned int __C, unsigned int __V) {
|
||||
return __builtin_ia32_crc32si(__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
|
||||
__funline unsigned long long _mm_crc32_u64(unsigned long long __C,
|
||||
unsigned long long __V) {
|
||||
return __builtin_ia32_crc32di(__C, __V);
|
||||
}
|
||||
#endif
|
||||
|
|
81
third_party/intel/tbmintrin.internal.h
vendored
81
third_party/intel/tbmintrin.internal.h
vendored
|
@ -12,9 +12,7 @@
|
|||
#endif /* __TBM__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextri_u32(unsigned int __X, const unsigned int __I) {
|
||||
__funline unsigned int __bextri_u32(unsigned int __X, const unsigned int __I) {
|
||||
return __builtin_ia32_bextri_u32(__X, __I);
|
||||
}
|
||||
#else
|
||||
|
@ -23,65 +21,46 @@ extern __inline unsigned int
|
|||
(unsigned int)(I)))
|
||||
#endif /*__OPTIMIZE__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcfill_u32(unsigned int __X) {
|
||||
__funline unsigned int __blcfill_u32(unsigned int __X) {
|
||||
return __X & (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blci_u32(unsigned int __X) {
|
||||
__funline unsigned int __blci_u32(unsigned int __X) {
|
||||
return __X | ~(__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcic_u32(unsigned int __X) {
|
||||
__funline unsigned int __blcic_u32(unsigned int __X) {
|
||||
return ~__X & (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcmsk_u32(unsigned int __X) {
|
||||
__funline unsigned int __blcmsk_u32(unsigned int __X) {
|
||||
return __X ^ (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcs_u32(unsigned int __X) {
|
||||
__funline unsigned int __blcs_u32(unsigned int __X) {
|
||||
return __X | (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsfill_u32(unsigned int __X) {
|
||||
__funline unsigned int __blsfill_u32(unsigned int __X) {
|
||||
return __X | (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsic_u32(unsigned int __X) {
|
||||
__funline unsigned int __blsic_u32(unsigned int __X) {
|
||||
return ~__X | (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__t1mskc_u32(unsigned int __X) {
|
||||
__funline unsigned int __t1mskc_u32(unsigned int __X) {
|
||||
return ~__X | (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzmsk_u32(unsigned int __X) {
|
||||
__funline unsigned int __tzmsk_u32(unsigned int __X) {
|
||||
return ~__X & (__X - 1);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextri_u64(unsigned long long __X, const unsigned int __I) {
|
||||
__funline unsigned long long __bextri_u64(unsigned long long __X,
|
||||
const unsigned int __I) {
|
||||
return __builtin_ia32_bextri_u64(__X, __I);
|
||||
}
|
||||
#else
|
||||
|
@ -90,57 +69,39 @@ extern __inline unsigned long long
|
|||
(unsigned long long)(I)))
|
||||
#endif /*__OPTIMIZE__ */
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcfill_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blcfill_u64(unsigned long long __X) {
|
||||
return __X & (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blci_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blci_u64(unsigned long long __X) {
|
||||
return __X | ~(__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcic_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blcic_u64(unsigned long long __X) {
|
||||
return ~__X & (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcmsk_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blcmsk_u64(unsigned long long __X) {
|
||||
return __X ^ (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blcs_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blcs_u64(unsigned long long __X) {
|
||||
return __X | (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsfill_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blsfill_u64(unsigned long long __X) {
|
||||
return __X | (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsic_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __blsic_u64(unsigned long long __X) {
|
||||
return ~__X | (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__t1mskc_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __t1mskc_u64(unsigned long long __X) {
|
||||
return ~__X | (__X + 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzmsk_u64(unsigned long long __X) {
|
||||
__funline unsigned long long __tzmsk_u64(unsigned long long __X) {
|
||||
return ~__X & (__X - 1);
|
||||
}
|
||||
|
||||
|
|
128
third_party/intel/tmmintrin.internal.h
vendored
128
third_party/intel/tmmintrin.internal.h
vendored
|
@ -9,160 +9,108 @@
|
|||
#define __DISABLE_SSSE3__
|
||||
#endif /* __SSSE3__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hadd_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hadd_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phaddd128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadds_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hadds_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hadd_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phaddw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pi32(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hadd_pi32(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phaddd((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadds_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hadds_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phaddsw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hsub_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hsub_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phsubd128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubs_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_hsubs_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hsub_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phsubw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pi32(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hsub_pi32(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phsubd((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubs_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_hsubs_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_phsubsw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddubs_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_maddubs_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddubs_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_maddubs_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhrs_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_mulhrs_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhrs_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_mulhrs_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shuffle_epi8(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_shuffle_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shuffle_pi8(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_shuffle_pi8(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_pshufb((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi8(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_sign_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_psignb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi16(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_sign_epi16(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_psignw128((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_epi32(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_sign_epi32(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_psignd128((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi8(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_sign_pi8(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_psignb((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi16(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_sign_pi16(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_psignw((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sign_pi32(__m64 __X, __m64 __Y) {
|
||||
__funline __m64 _mm_sign_pi32(__m64 __X, __m64 __Y) {
|
||||
return (__m64)__builtin_ia32_psignd((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) {
|
||||
__funline __m128i _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) {
|
||||
return (__m128i)__builtin_ia32_palignr128((__v2di)__X, (__v2di)__Y, __N * 8);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) {
|
||||
__funline __m64 _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) {
|
||||
return (__m64)__builtin_ia32_palignr((__v1di)__X, (__v1di)__Y, __N * 8);
|
||||
}
|
||||
#else
|
||||
|
@ -174,39 +122,27 @@ extern __inline __m64
|
|||
(int)(N)*8))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi8(__m128i __X) {
|
||||
__funline __m128i _mm_abs_epi8(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi16(__m128i __X) {
|
||||
__funline __m128i _mm_abs_epi16(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_epi32(__m128i __X) {
|
||||
__funline __m128i _mm_abs_epi32(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_pabsd128((__v4si)__X);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi8(__m64 __X) {
|
||||
__funline __m64 _mm_abs_pi8(__m64 __X) {
|
||||
return (__m64)__builtin_ia32_pabsb((__v8qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi16(__m64 __X) {
|
||||
__funline __m64 _mm_abs_pi16(__m64 __X) {
|
||||
return (__m64)__builtin_ia32_pabsw((__v4hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_abs_pi32(__m64 __X) {
|
||||
__funline __m64 _mm_abs_pi32(__m64 __X) {
|
||||
return (__m64)__builtin_ia32_pabsd((__v2si)__X);
|
||||
}
|
||||
|
||||
|
|
32
third_party/intel/vaesintrin.internal.h
vendored
32
third_party/intel/vaesintrin.internal.h
vendored
|
@ -9,27 +9,19 @@
|
|||
#define __DISABLE_VAES__
|
||||
#endif /* __VAES__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_aesdec_epi128(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_aesdec_epi128(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vaesdec_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vaesdeclast_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_aesenc_epi128(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vaesenc_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
|
||||
__funline __m256i _mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vaesenclast_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
}
|
||||
|
||||
|
@ -44,27 +36,19 @@ extern __inline __m256i
|
|||
#define __DISABLE_VAESF__
|
||||
#endif /* __VAES__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_aesdec_epi128(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_aesdec_epi128(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vaesdec_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vaesdeclast_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_aesenc_epi128(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_aesenc_epi128(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vaesenc_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
|
||||
__funline __m512i _mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vaesenclast_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
}
|
||||
|
||||
|
|
10
third_party/intel/vpclmulqdqintrin.internal.h
vendored
10
third_party/intel/vpclmulqdqintrin.internal.h
vendored
|
@ -12,9 +12,8 @@
|
|||
#endif /* __VPCLMULQDQF__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_clmulepi64_epi128(__m512i __A, __m512i __B, const int __C) {
|
||||
__funline __m512i _mm512_clmulepi64_epi128(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vpclmulqdq_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
}
|
||||
#else
|
||||
|
@ -35,9 +34,8 @@ extern __inline __m512i
|
|||
#endif /* __VPCLMULQDQ__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_clmulepi64_epi128(__m256i __A, __m256i __B, const int __C) {
|
||||
__funline __m256i _mm256_clmulepi64_epi128(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vpclmulqdq_v4di((__v4di)__A, (__v4di)__B, __C);
|
||||
}
|
||||
#else
|
||||
|
|
12
third_party/intel/waitpkgintrin.internal.h
vendored
12
third_party/intel/waitpkgintrin.internal.h
vendored
|
@ -11,21 +11,15 @@
|
|||
#define __DISABLE_WAITPKG__
|
||||
#endif /* __WAITPKG__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_umonitor(void *__A) {
|
||||
__funline void _umonitor(void *__A) {
|
||||
__builtin_ia32_umonitor(__A);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_umwait(unsigned int __A, unsigned long long __B) {
|
||||
__funline unsigned char _umwait(unsigned int __A, unsigned long long __B) {
|
||||
return __builtin_ia32_umwait(__A, __B);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tpause(unsigned int __A, unsigned long long __B) {
|
||||
__funline unsigned char _tpause(unsigned int __A, unsigned long long __B) {
|
||||
return __builtin_ia32_tpause(__A, __B);
|
||||
}
|
||||
|
||||
|
|
4
third_party/intel/wbnoinvdintrin.internal.h
vendored
4
third_party/intel/wbnoinvdintrin.internal.h
vendored
|
@ -11,9 +11,7 @@
|
|||
#define __DISABLE_WBNOINVD__
|
||||
#endif /* __WBNOINVD__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wbnoinvd(void) {
|
||||
__funline void _wbnoinvd(void) {
|
||||
__builtin_ia32_wbnoinvd();
|
||||
}
|
||||
|
||||
|
|
28
third_party/intel/wmmintrin.internal.h
vendored
28
third_party/intel/wmmintrin.internal.h
vendored
|
@ -9,40 +9,28 @@
|
|||
#define __DISABLE_AES__
|
||||
#endif /* __AES__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec_si128(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_aesdec_si128(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_aesdec128((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdeclast_si128(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_aesdeclast_si128(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc_si128(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_aesenc_si128(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_aesenc128((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenclast_si128(__m128i __X, __m128i __Y) {
|
||||
__funline __m128i _mm_aesenclast_si128(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_aesenclast128((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesimc_si128(__m128i __X) {
|
||||
__funline __m128i _mm_aesimc_si128(__m128i __X) {
|
||||
return (__m128i)__builtin_ia32_aesimc128((__v2di)__X);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aeskeygenassist_si128(__m128i __X, const int __C) {
|
||||
__funline __m128i _mm_aeskeygenassist_si128(__m128i __X, const int __C) {
|
||||
return (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)__X, __C);
|
||||
}
|
||||
#else
|
||||
|
@ -62,9 +50,7 @@ extern __inline __m128i
|
|||
#endif /* __PCLMUL__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I) {
|
||||
__funline __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I) {
|
||||
return (__m128i)__builtin_ia32_pclmulqdq128((__v2di)__X, (__v2di)__Y, __I);
|
||||
}
|
||||
#else
|
||||
|
|
621
third_party/intel/xmmintrin.internal.h
vendored
621
third_party/intel/xmmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
480
third_party/intel/xopintrin.internal.h
vendored
480
third_party/intel/xopintrin.internal.h
vendored
|
@ -13,192 +13,134 @@
|
|||
#define __DISABLE_XOP__
|
||||
#endif /* __XOP__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B,
|
||||
(__v8hi)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B,
|
||||
(__v2di)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
/* Packed Integer Horizontal Add and Subtract */
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddw_epi8(__m128i __A) {
|
||||
__funline __m128i _mm_haddw_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddd_epi8(__m128i __A) {
|
||||
__funline __m128i _mm_haddd_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epi8(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddd_epi16(__m128i __A) {
|
||||
__funline __m128i _mm_haddd_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epi16(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epi32(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddw_epu8(__m128i __A) {
|
||||
__funline __m128i _mm_haddw_epu8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddd_epu8(__m128i __A) {
|
||||
__funline __m128i _mm_haddd_epu8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epu8(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epu8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddd_epu16(__m128i __A) {
|
||||
__funline __m128i _mm_haddd_epu16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epu16(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epu16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_haddq_epu32(__m128i __A) {
|
||||
__funline __m128i _mm_haddq_epu32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubw_epi8(__m128i __A) {
|
||||
__funline __m128i _mm_hsubw_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubd_epi16(__m128i __A) {
|
||||
__funline __m128i _mm_hsubd_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsubq_epi32(__m128i __A) {
|
||||
__funline __m128i _mm_hsubq_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
|
||||
}
|
||||
|
||||
/* Vector conditional move and permute */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) {
|
||||
__funline __m128i _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B,
|
||||
(__v16qi)__C);
|
||||
}
|
||||
|
@ -206,52 +148,36 @@ extern __inline __m128i
|
|||
/* Packed Integer Rotates and Shifts
|
||||
Rotates - Non-Immediate form */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rot_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_rot_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rot_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_rot_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rot_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_rot_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rot_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_rot_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roti_epi8(__m128i __A, const int __B) {
|
||||
__funline __m128i _mm_roti_epi8(__m128i __A, const int __B) {
|
||||
return (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roti_epi16(__m128i __A, const int __B) {
|
||||
__funline __m128i _mm_roti_epi16(__m128i __A, const int __B) {
|
||||
return (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roti_epi32(__m128i __A, const int __B) {
|
||||
__funline __m128i _mm_roti_epi32(__m128i __A, const int __B) {
|
||||
return (__m128i)__builtin_ia32_vprotdi((__v4si)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_roti_epi64(__m128i __A, const int __B) {
|
||||
__funline __m128i _mm_roti_epi64(__m128i __A, const int __B) {
|
||||
return (__m128i)__builtin_ia32_vprotqi((__v2di)__A, __B);
|
||||
}
|
||||
#else
|
||||
|
@ -265,501 +191,341 @@ extern __inline __m128i
|
|||
((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (int)(N)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shl_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_shl_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shl_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_shl_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shl_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_shl_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_shl_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_shl_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_sha_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomequb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomnequb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epu8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epu8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueub((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomequw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomnequw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epu16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epu16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueuw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomequd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomnequd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epu32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epu32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueud((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomequq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomnequq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epu64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epu64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueuq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomeqb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomneqb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epi8(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueb((__v16qi)__A, (__v16qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomlew((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgew((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomeqw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomneqw((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalsew((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epi16(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epi16(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtruew((__v8hi)__A, (__v8hi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomled((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomged((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomeqd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomneqd((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalsed((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epi32(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epi32(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrued((__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comlt_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comlt_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomltq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comle_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comle_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomleq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comgt_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comgt_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgtq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comge_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comge_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomgeq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comeq_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comeq_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomeqq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comneq_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comneq_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomneqq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comfalse_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comfalse_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomfalseq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_comtrue_epi64(__m128i __A, __m128i __B) {
|
||||
__funline __m128i _mm_comtrue_epi64(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpcomtrueq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_frcz_ps(__m128 __A) {
|
||||
__funline __m128 _mm_frcz_ps(__m128 __A) {
|
||||
return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_frcz_pd(__m128d __A) {
|
||||
__funline __m128d _mm_frcz_pd(__m128d __A) {
|
||||
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_frcz_ss(__m128 __A, __m128 __B) {
|
||||
__funline __m128 _mm_frcz_ss(__m128 __A, __m128 __B) {
|
||||
return (__m128)__builtin_ia32_movss(
|
||||
(__v4sf)__A, (__v4sf)__builtin_ia32_vfrczss((__v4sf)__B));
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_frcz_sd(__m128d __A, __m128d __B) {
|
||||
__funline __m128d _mm_frcz_sd(__m128d __A, __m128d __B) {
|
||||
return (__m128d)__builtin_ia32_movsd(
|
||||
(__v2df)__A, (__v2df)__builtin_ia32_vfrczsd((__v2df)__B));
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_frcz_ps(__m256 __A) {
|
||||
__funline __m256 _mm256_frcz_ps(__m256 __A) {
|
||||
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_frcz_pd(__m256d __A) {
|
||||
__funline __m256d _mm256_frcz_pd(__m256d __A) {
|
||||
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_pd(__m128d __X, __m128d __Y, __m128i __C, const int __I) {
|
||||
__funline __m128d _mm_permute2_pd(__m128d __X, __m128d __Y, __m128i __C,
|
||||
const int __I) {
|
||||
return (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y,
|
||||
(__v2di)__C, __I);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_pd(__m256d __X, __m256d __Y, __m256i __C, const int __I) {
|
||||
__funline __m256d _mm256_permute2_pd(__m256d __X, __m256d __Y, __m256i __C,
|
||||
const int __I) {
|
||||
return (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y,
|
||||
(__v4di)__C, __I);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_ps(__m128 __X, __m128 __Y, __m128i __C, const int __I) {
|
||||
__funline __m128 _mm_permute2_ps(__m128 __X, __m128 __Y, __m128i __C,
|
||||
const int __I) {
|
||||
return (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y,
|
||||
(__v4si)__C, __I);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_ps(__m256 __X, __m256 __Y, __m256i __C, const int __I) {
|
||||
__funline __m256 _mm256_peeeeeeermute2_ps(__m256 __X, __m256 __Y, __m256i __C,
|
||||
const int __I) {
|
||||
return (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y,
|
||||
(__v8si)__C, __I);
|
||||
}
|
||||
|
|
8
third_party/intel/xsavecintrin.internal.h
vendored
8
third_party/intel/xsavecintrin.internal.h
vendored
|
@ -11,16 +11,12 @@
|
|||
#define __DISABLE_XSAVEC__
|
||||
#endif /* __XSAVEC__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsavec(void *__P, long long __M) {
|
||||
__funline void _xsavec(void *__P, long long __M) {
|
||||
__builtin_ia32_xsavec(__P, __M);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsavec64(void *__P, long long __M) {
|
||||
__funline void _xsavec64(void *__P, long long __M) {
|
||||
__builtin_ia32_xsavec64(__P, __M);
|
||||
}
|
||||
#endif
|
||||
|
|
24
third_party/intel/xsaveintrin.internal.h
vendored
24
third_party/intel/xsaveintrin.internal.h
vendored
|
@ -11,40 +11,28 @@
|
|||
#define __DISABLE_XSAVE__
|
||||
#endif /* __XSAVE__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsave(void *__P, long long __M) {
|
||||
__funline void _xsave(void *__P, long long __M) {
|
||||
__builtin_ia32_xsave(__P, __M);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xrstor(void *__P, long long __M) {
|
||||
__funline void _xrstor(void *__P, long long __M) {
|
||||
__builtin_ia32_xrstor(__P, __M);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsetbv(unsigned int __A, long long __V) {
|
||||
__funline void _xsetbv(unsigned int __A, long long __V) {
|
||||
__builtin_ia32_xsetbv(__A, __V);
|
||||
}
|
||||
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xgetbv(unsigned int __A) {
|
||||
__funline long long _xgetbv(unsigned int __A) {
|
||||
return __builtin_ia32_xgetbv(__A);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsave64(void *__P, long long __M) {
|
||||
__funline void _xsave64(void *__P, long long __M) {
|
||||
__builtin_ia32_xsave64(__P, __M);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xrstor64(void *__P, long long __M) {
|
||||
__funline void _xrstor64(void *__P, long long __M) {
|
||||
__builtin_ia32_xrstor64(__P, __M);
|
||||
}
|
||||
#endif
|
||||
|
|
8
third_party/intel/xsaveoptintrin.internal.h
vendored
8
third_party/intel/xsaveoptintrin.internal.h
vendored
|
@ -11,16 +11,12 @@
|
|||
#define __DISABLE_XSAVEOPT__
|
||||
#endif /* __XSAVEOPT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsaveopt(void *__P, long long __M) {
|
||||
__funline void _xsaveopt(void *__P, long long __M) {
|
||||
__builtin_ia32_xsaveopt(__P, __M);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsaveopt64(void *__P, long long __M) {
|
||||
__funline void _xsaveopt64(void *__P, long long __M) {
|
||||
__builtin_ia32_xsaveopt64(__P, __M);
|
||||
}
|
||||
#endif
|
||||
|
|
16
third_party/intel/xsavesintrin.internal.h
vendored
16
third_party/intel/xsavesintrin.internal.h
vendored
|
@ -11,28 +11,20 @@
|
|||
#define __DISABLE_XSAVES__
|
||||
#endif /* __XSAVES__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsaves(void *__P, long long __M) {
|
||||
__funline void _xsaves(void *__P, long long __M) {
|
||||
__builtin_ia32_xsaves(__P, __M);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xrstors(void *__P, long long __M) {
|
||||
__funline void _xrstors(void *__P, long long __M) {
|
||||
__builtin_ia32_xrstors(__P, __M);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xrstors64(void *__P, long long __M) {
|
||||
__funline void _xrstors64(void *__P, long long __M) {
|
||||
__builtin_ia32_xrstors64(__P, __M);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xsaves64(void *__P, long long __M) {
|
||||
__funline void _xsaves64(void *__P, long long __M) {
|
||||
__builtin_ia32_xsaves64(__P, __M);
|
||||
}
|
||||
#endif
|
||||
|
|
4
third_party/intel/xtestintrin.internal.h
vendored
4
third_party/intel/xtestintrin.internal.h
vendored
|
@ -11,9 +11,7 @@
|
|||
#define __DISABLE_RTM__
|
||||
#endif /* __RTM__ */
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xtest(void) {
|
||||
__funline int _xtest(void) {
|
||||
return __builtin_ia32_xtest();
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
(cosmo
|
||||
'("__msabi"
|
||||
"__funline"
|
||||
"function"
|
||||
"offsetof"
|
||||
"microarchitecture"
|
||||
|
|
Loading…
Reference in a new issue