Make the intrinsics more readable

This commit is contained in:
Justine Tunney 2023-05-15 23:11:47 -07:00
parent 210187cf77
commit 80db9de173
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
75 changed files with 12444 additions and 21493 deletions

View file

@ -854,5 +854,9 @@ typedef struct {
asm(".weak\t" #alias "\n\t" \
".equ\t" #alias ", " #sym)
#define __funline \
extern __inline \
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
#define MACHINE_CODE_ANALYSIS_BEGIN_
#define MACHINE_CODE_ANALYSIS_END_

View file

@ -8,365 +8,361 @@
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+fp16")
#define FUNC \
__extension__ extern __inline \
__attribute__((__always_inline__, __gnu_inline__, __artificial__))
typedef __fp16 float16_t;
FUNC float16_t vabsh_f16(float16_t __a) {
__funline float16_t vabsh_f16(float16_t __a) {
return __builtin_aarch64_abshf(__a);
}
FUNC uint16_t vceqzh_f16(float16_t __a) {
__funline uint16_t vceqzh_f16(float16_t __a) {
return __builtin_aarch64_cmeqhf_uss(__a, 0.0f);
}
FUNC uint16_t vcgezh_f16(float16_t __a) {
__funline uint16_t vcgezh_f16(float16_t __a) {
return __builtin_aarch64_cmgehf_uss(__a, 0.0f);
}
FUNC uint16_t vcgtzh_f16(float16_t __a) {
__funline uint16_t vcgtzh_f16(float16_t __a) {
return __builtin_aarch64_cmgthf_uss(__a, 0.0f);
}
FUNC uint16_t vclezh_f16(float16_t __a) {
__funline uint16_t vclezh_f16(float16_t __a) {
return __builtin_aarch64_cmlehf_uss(__a, 0.0f);
}
FUNC uint16_t vcltzh_f16(float16_t __a) {
__funline uint16_t vcltzh_f16(float16_t __a) {
return __builtin_aarch64_cmlthf_uss(__a, 0.0f);
}
FUNC float16_t vcvth_f16_s16(int16_t __a) {
__funline float16_t vcvth_f16_s16(int16_t __a) {
return __builtin_aarch64_floathihf(__a);
}
FUNC float16_t vcvth_f16_s32(int32_t __a) {
__funline float16_t vcvth_f16_s32(int32_t __a) {
return __builtin_aarch64_floatsihf(__a);
}
FUNC float16_t vcvth_f16_s64(int64_t __a) {
__funline float16_t vcvth_f16_s64(int64_t __a) {
return __builtin_aarch64_floatdihf(__a);
}
FUNC float16_t vcvth_f16_u16(uint16_t __a) {
__funline float16_t vcvth_f16_u16(uint16_t __a) {
return __builtin_aarch64_floatunshihf_us(__a);
}
FUNC float16_t vcvth_f16_u32(uint32_t __a) {
__funline float16_t vcvth_f16_u32(uint32_t __a) {
return __builtin_aarch64_floatunssihf_us(__a);
}
FUNC float16_t vcvth_f16_u64(uint64_t __a) {
__funline float16_t vcvth_f16_u64(uint64_t __a) {
return __builtin_aarch64_floatunsdihf_us(__a);
}
FUNC int16_t vcvth_s16_f16(float16_t __a) {
__funline int16_t vcvth_s16_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfhi(__a);
}
FUNC int32_t vcvth_s32_f16(float16_t __a) {
__funline int32_t vcvth_s32_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfsi(__a);
}
FUNC int64_t vcvth_s64_f16(float16_t __a) {
__funline int64_t vcvth_s64_f16(float16_t __a) {
return __builtin_aarch64_fix_trunchfdi(__a);
}
FUNC uint16_t vcvth_u16_f16(float16_t __a) {
__funline uint16_t vcvth_u16_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfhi_us(__a);
}
FUNC uint32_t vcvth_u32_f16(float16_t __a) {
__funline uint32_t vcvth_u32_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfsi_us(__a);
}
FUNC uint64_t vcvth_u64_f16(float16_t __a) {
__funline uint64_t vcvth_u64_f16(float16_t __a) {
return __builtin_aarch64_fixuns_trunchfdi_us(__a);
}
FUNC int16_t vcvtah_s16_f16(float16_t __a) {
__funline int16_t vcvtah_s16_f16(float16_t __a) {
return __builtin_aarch64_lroundhfhi(__a);
}
FUNC int32_t vcvtah_s32_f16(float16_t __a) {
__funline int32_t vcvtah_s32_f16(float16_t __a) {
return __builtin_aarch64_lroundhfsi(__a);
}
FUNC int64_t vcvtah_s64_f16(float16_t __a) {
__funline int64_t vcvtah_s64_f16(float16_t __a) {
return __builtin_aarch64_lroundhfdi(__a);
}
FUNC uint16_t vcvtah_u16_f16(float16_t __a) {
__funline uint16_t vcvtah_u16_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfhi_us(__a);
}
FUNC uint32_t vcvtah_u32_f16(float16_t __a) {
__funline uint32_t vcvtah_u32_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfsi_us(__a);
}
FUNC uint64_t vcvtah_u64_f16(float16_t __a) {
__funline uint64_t vcvtah_u64_f16(float16_t __a) {
return __builtin_aarch64_lrounduhfdi_us(__a);
}
FUNC int16_t vcvtmh_s16_f16(float16_t __a) {
__funline int16_t vcvtmh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfhi(__a);
}
FUNC int32_t vcvtmh_s32_f16(float16_t __a) {
__funline int32_t vcvtmh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfsi(__a);
}
FUNC int64_t vcvtmh_s64_f16(float16_t __a) {
__funline int64_t vcvtmh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfloorhfdi(__a);
}
FUNC uint16_t vcvtmh_u16_f16(float16_t __a) {
__funline uint16_t vcvtmh_u16_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfhi_us(__a);
}
FUNC uint32_t vcvtmh_u32_f16(float16_t __a) {
__funline uint32_t vcvtmh_u32_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfsi_us(__a);
}
FUNC uint64_t vcvtmh_u64_f16(float16_t __a) {
__funline uint64_t vcvtmh_u64_f16(float16_t __a) {
return __builtin_aarch64_lflooruhfdi_us(__a);
}
FUNC int16_t vcvtnh_s16_f16(float16_t __a) {
__funline int16_t vcvtnh_s16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfhi(__a);
}
FUNC int32_t vcvtnh_s32_f16(float16_t __a) {
__funline int32_t vcvtnh_s32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfsi(__a);
}
FUNC int64_t vcvtnh_s64_f16(float16_t __a) {
__funline int64_t vcvtnh_s64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnhfdi(__a);
}
FUNC uint16_t vcvtnh_u16_f16(float16_t __a) {
__funline uint16_t vcvtnh_u16_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfhi_us(__a);
}
FUNC uint32_t vcvtnh_u32_f16(float16_t __a) {
__funline uint32_t vcvtnh_u32_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfsi_us(__a);
}
FUNC uint64_t vcvtnh_u64_f16(float16_t __a) {
__funline uint64_t vcvtnh_u64_f16(float16_t __a) {
return __builtin_aarch64_lfrintnuhfdi_us(__a);
}
FUNC int16_t vcvtph_s16_f16(float16_t __a) {
__funline int16_t vcvtph_s16_f16(float16_t __a) {
return __builtin_aarch64_lceilhfhi(__a);
}
FUNC int32_t vcvtph_s32_f16(float16_t __a) {
__funline int32_t vcvtph_s32_f16(float16_t __a) {
return __builtin_aarch64_lceilhfsi(__a);
}
FUNC int64_t vcvtph_s64_f16(float16_t __a) {
__funline int64_t vcvtph_s64_f16(float16_t __a) {
return __builtin_aarch64_lceilhfdi(__a);
}
FUNC uint16_t vcvtph_u16_f16(float16_t __a) {
__funline uint16_t vcvtph_u16_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfhi_us(__a);
}
FUNC uint32_t vcvtph_u32_f16(float16_t __a) {
__funline uint32_t vcvtph_u32_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfsi_us(__a);
}
FUNC uint64_t vcvtph_u64_f16(float16_t __a) {
__funline uint64_t vcvtph_u64_f16(float16_t __a) {
return __builtin_aarch64_lceiluhfdi_us(__a);
}
FUNC float16_t vnegh_f16(float16_t __a) {
__funline float16_t vnegh_f16(float16_t __a) {
return __builtin_aarch64_neghf(__a);
}
FUNC float16_t vrecpeh_f16(float16_t __a) {
__funline float16_t vrecpeh_f16(float16_t __a) {
return __builtin_aarch64_frecpehf(__a);
}
FUNC float16_t vrecpxh_f16(float16_t __a) {
__funline float16_t vrecpxh_f16(float16_t __a) {
return __builtin_aarch64_frecpxhf(__a);
}
FUNC float16_t vrndh_f16(float16_t __a) {
__funline float16_t vrndh_f16(float16_t __a) {
return __builtin_aarch64_btrunchf(__a);
}
FUNC float16_t vrndah_f16(float16_t __a) {
__funline float16_t vrndah_f16(float16_t __a) {
return __builtin_aarch64_roundhf(__a);
}
FUNC float16_t vrndih_f16(float16_t __a) {
__funline float16_t vrndih_f16(float16_t __a) {
return __builtin_aarch64_nearbyinthf(__a);
}
FUNC float16_t vrndmh_f16(float16_t __a) {
__funline float16_t vrndmh_f16(float16_t __a) {
return __builtin_aarch64_floorhf(__a);
}
FUNC float16_t vrndnh_f16(float16_t __a) {
__funline float16_t vrndnh_f16(float16_t __a) {
return __builtin_aarch64_frintnhf(__a);
}
FUNC float16_t vrndph_f16(float16_t __a) {
__funline float16_t vrndph_f16(float16_t __a) {
return __builtin_aarch64_ceilhf(__a);
}
FUNC float16_t vrndxh_f16(float16_t __a) {
__funline float16_t vrndxh_f16(float16_t __a) {
return __builtin_aarch64_rinthf(__a);
}
FUNC float16_t vrsqrteh_f16(float16_t __a) {
__funline float16_t vrsqrteh_f16(float16_t __a) {
return __builtin_aarch64_rsqrtehf(__a);
}
FUNC float16_t vsqrth_f16(float16_t __a) {
__funline float16_t vsqrth_f16(float16_t __a) {
return __builtin_aarch64_sqrthf(__a);
}
FUNC float16_t vaddh_f16(float16_t __a, float16_t __b) {
__funline float16_t vaddh_f16(float16_t __a, float16_t __b) {
return __a + __b;
}
FUNC float16_t vabdh_f16(float16_t __a, float16_t __b) {
__funline float16_t vabdh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fabdhf(__a, __b);
}
FUNC uint16_t vcageh_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcageh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgehf_uss(__a, __b);
}
FUNC uint16_t vcagth_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcagth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_facgthf_uss(__a, __b);
}
FUNC uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcaleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclehf_uss(__a, __b);
}
FUNC uint16_t vcalth_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcalth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_faclthf_uss(__a, __b);
}
FUNC uint16_t vceqh_f16(float16_t __a, float16_t __b) {
__funline uint16_t vceqh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmeqhf_uss(__a, __b);
}
FUNC uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcgeh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgehf_uss(__a, __b);
}
FUNC uint16_t vcgth_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcgth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmgthf_uss(__a, __b);
}
FUNC uint16_t vcleh_f16(float16_t __a, float16_t __b) {
__funline uint16_t vcleh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlehf_uss(__a, __b);
}
FUNC uint16_t vclth_f16(float16_t __a, float16_t __b) {
__funline uint16_t vclth_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_cmlthf_uss(__a, __b);
}
FUNC float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
__funline float16_t vcvth_n_f16_s16(int16_t __a, const int __b) {
return __builtin_aarch64_scvtfhi(__a, __b);
}
FUNC float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
__funline float16_t vcvth_n_f16_s32(int32_t __a, const int __b) {
return __builtin_aarch64_scvtfsihf(__a, __b);
}
FUNC float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
__funline float16_t vcvth_n_f16_s64(int64_t __a, const int __b) {
return __builtin_aarch64_scvtfdihf(__a, __b);
}
FUNC float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
__funline float16_t vcvth_n_f16_u16(uint16_t __a, const int __b) {
return __builtin_aarch64_ucvtfhi_sus(__a, __b);
}
FUNC float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
__funline float16_t vcvth_n_f16_u32(uint32_t __a, const int __b) {
return __builtin_aarch64_ucvtfsihf_sus(__a, __b);
}
FUNC float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
__funline float16_t vcvth_n_f16_u64(uint64_t __a, const int __b) {
return __builtin_aarch64_ucvtfdihf_sus(__a, __b);
}
FUNC int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
__funline int16_t vcvth_n_s16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshf(__a, __b);
}
FUNC int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
__funline int32_t vcvth_n_s32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfsi(__a, __b);
}
FUNC int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
__funline int64_t vcvth_n_s64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzshfdi(__a, __b);
}
FUNC uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
__funline uint16_t vcvth_n_u16_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhf_uss(__a, __b);
}
FUNC uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
__funline uint32_t vcvth_n_u32_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfsi_uss(__a, __b);
}
FUNC uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
__funline uint64_t vcvth_n_u64_f16(float16_t __a, const int __b) {
return __builtin_aarch64_fcvtzuhfdi_uss(__a, __b);
}
FUNC float16_t vdivh_f16(float16_t __a, float16_t __b) {
__funline float16_t vdivh_f16(float16_t __a, float16_t __b) {
return __a / __b;
}
FUNC float16_t vmaxh_f16(float16_t __a, float16_t __b) {
__funline float16_t vmaxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
FUNC float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
__funline float16_t vmaxnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmaxhf(__a, __b);
}
FUNC float16_t vminh_f16(float16_t __a, float16_t __b) {
__funline float16_t vminh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
FUNC float16_t vminnmh_f16(float16_t __a, float16_t __b) {
__funline float16_t vminnmh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fminhf(__a, __b);
}
FUNC float16_t vmulh_f16(float16_t __a, float16_t __b) {
__funline float16_t vmulh_f16(float16_t __a, float16_t __b) {
return __a * __b;
}
FUNC float16_t vmulxh_f16(float16_t __a, float16_t __b) {
__funline float16_t vmulxh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_fmulxhf(__a, __b);
}
FUNC float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
__funline float16_t vrecpsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_frecpshf(__a, __b);
}
FUNC float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
__funline float16_t vrsqrtsh_f16(float16_t __a, float16_t __b) {
return __builtin_aarch64_rsqrtshf(__a, __b);
}
FUNC float16_t vsubh_f16(float16_t __a, float16_t __b) {
__funline float16_t vsubh_f16(float16_t __a, float16_t __b) {
return __a - __b;
}
FUNC float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
__funline float16_t vfmah_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fmahf(__b, __c, __a);
}
FUNC float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
__funline float16_t vfmsh_f16(float16_t __a, float16_t __b, float16_t __c) {
return __builtin_aarch64_fnmahf(__b, __c, __a);
}

File diff suppressed because it is too large Load diff

View file

@ -1784,24 +1784,40 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
// Initialize accumulator with zeros
__m256 acc = _mm256_setzero_ps();
//
// Main loop
for (int i = 0; i < nb; ++i) {
/* Compute combined scale for the block */
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
__m256i bx = bytes_from_nibbles_32(x[i].qs);
// Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
const __m256i off = _mm256_set1_epi8( 8 );
bx = _mm256_sub_epi8( bx, off );
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
const __m256 q = mul_sum_i8_pairs_float(bx, by);
/* Multiply q with scale and accumulate */
acc = _mm256_fmadd_ps( d, q, acc );
//
#define WORK(I) \
/* Compute combined scale for the block */ \
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[I].d ), _mm256_broadcast_ss( &y[I].d ) ); \
__m256i bx = bytes_from_nibbles_32(x[I].qs); \
/* Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. */ \
const __m256i off = _mm256_set1_epi8( 8 ); \
bx = _mm256_sub_epi8( bx, off ); \
__m256i by = _mm256_loadu_si256((const __m256i *)y[I].qs); \
const __m256 q = mul_sum_i8_pairs_float(bx, by); \
/* Multiply q with scale and accumulate */ \
acc = _mm256_fmadd_ps( d, q, acc )
int i = 0;
for (; i + 12 < nb; i += 12) {
_mm_prefetch(x+i+12, 3);
_mm_prefetch(x+i+15, 3);
_mm_prefetch(x+i+18, 3);
_mm_prefetch(x+i+21, 3);
_mm_prefetch(y+i+12, 3);
_mm_prefetch(y+i+14, 3);
_mm_prefetch(y+i+16, 3);
_mm_prefetch(y+i+18, 3);
_mm_prefetch(y+i+20, 3);
_mm_prefetch(y+i+22, 3);
for (int j = 0; j < 12; ++j) {
WORK(i+j);
}
}
for (; i < nb; ++i) {
WORK(i);
}
#undef WORK
*s = hsum_float_8(acc);
#elif defined(__AVX__)

View file

@ -5,46 +5,37 @@
#ifndef _ADXINTRIN_H_INCLUDED
#define _ADXINTRIN_H_INCLUDED
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_subborrow_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
unsigned int *__P) {
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P) {
return __builtin_ia32_sbb_u32(__CF, __X, __Y, __P);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_addcarry_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
unsigned int *__P) {
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P) {
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_addcarryx_u32(unsigned char __CF, unsigned int __X, unsigned int __Y,
unsigned int *__P) {
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P) {
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
}
#ifdef __x86_64__
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_subborrow_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y, unsigned long long *__P) {
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y,
unsigned long long *__P) {
return __builtin_ia32_sbb_u64(__CF, __X, __Y, __P);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_addcarry_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y, unsigned long long *__P) {
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y,
unsigned long long *__P) {
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_addcarryx_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y, unsigned long long *__P) {
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X,
unsigned long long __Y,
unsigned long long *__P) {
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
}
#endif

View file

@ -9,28 +9,21 @@
#define __DISABLE_SSE4A__
#endif /* __SSE4A__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_sd(double* __P, __m128d __Y) {
__funline void _mm_stream_sd(double* __P, __m128d __Y) {
__builtin_ia32_movntsd(__P, (__v2df)__Y);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_ss(float* __P, __m128 __Y) {
__funline void _mm_stream_ss(float* __P, __m128 __Y) {
__builtin_ia32_movntss(__P, (__v4sf)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_si64(__m128i __X, __m128i __Y) {
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_extrq((__v2di)__X, (__v16qi)__Y);
}
#ifdef __OPTIMIZE__
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_extracti_si64(__m128i __X, unsigned const int __I, unsigned const int __L) {
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I,
unsigned const int __L) {
return (__m128i)__builtin_ia32_extrqi((__v2di)__X, __I, __L);
}
#else
@ -39,16 +32,13 @@ _mm_extracti_si64(__m128i __X, unsigned const int __I, unsigned const int __L) {
(unsigned int)(L)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_si64(__m128i __X, __m128i __Y) {
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_insertq((__v2di)__X, (__v2di)__Y);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I,
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y,
unsigned const int __I,
unsigned const int __L) {
return (__m128i)__builtin_ia32_insertqi((__v2di)__X, (__v2di)__Y, __I, __L);
}

File diff suppressed because it is too large Load diff

View file

@ -12,109 +12,93 @@
#define __DISABLE_AVX5124FMAPS__
#endif /* __AVX5124FMAPS__ */
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
__m512 __E, __m128 *__F) {
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C,
(__v16sf)__D, (__v16sf)__E,
(__v16sf)__A, (const __v4sf *)__F);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F) {
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F) {
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
__m128 __E, __m128 *__F) {
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
(__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F) {
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
__m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
return (__m128)__builtin_ia32_4fmaddss_mask(
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F) {
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
__m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
return (__m128)__builtin_ia32_4fmaddss_mask(
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
__m512 __E, __m128 *__F) {
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C,
(__v16sf)__D, (__v16sf)__E,
(__v16sf)__A, (const __v4sf *)__F);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F) {
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fnmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F) {
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fnmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
__m128 __E, __m128 *__F) {
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
(__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F) {
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
__m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
return (__m128)__builtin_ia32_4fnmaddss_mask(
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F) {
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
__m128 __C, __m128 __D, __m128 __E,
__m128 *__F) {
return (__m128)__builtin_ia32_4fnmaddss_mask(
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);

View file

@ -12,58 +12,49 @@
#define __DISABLE_AVX5124VNNIW__
#endif /* __AVX5124VNNIW__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
__m512i __E, __m128i *__F) {
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C,
__m512i __D, __m512i __E, __m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C,
(__v16si)__D, (__v16si)__E,
(__v16si)__A, (const __v4si *)__F);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__m128i *__F) {
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U,
__m512i __B, __m512i __C, __m512i __D,
__m512i __E, __m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A,
__m512i __B, __m512i __C,
__m512i __D, __m512i __E,
__m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
__m512i __E, __m128i *__F) {
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C,
__m512i __D, __m512i __E, __m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C,
(__v16si)__D, (__v16si)__E,
(__v16si)__A, (const __v4si *)__F);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U,
__m512i __B, __m512i __C,
__m512i __D, __m512i __E,
__m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A,
__m512i __B, __m512i __C,
__m512i __D, __m512i __E,
__m128i *__F) {
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,

View file

@ -12,15 +12,11 @@
#define __DISABLE_AVX512BITALG__
#endif /* __AVX512BITALG__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi8(__m512i __A) {
__funline __m512i _mm512_popcnt_epi8(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcountb_v64qi((__v64qi)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi16(__m512i __A) {
__funline __m512i _mm512_popcnt_epi16(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcountw_v32hi((__v32hi)__A);
}
@ -35,43 +31,34 @@ extern __inline __m512i
#define __DISABLE_AVX512BITALGBW__
#endif /* __AVX512VLBW__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U,
__m512i __B) {
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U,
__m512i __B) {
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U);
}
extern __inline __mmask64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
}
extern __inline __mmask64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A, __m512i __B) {
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A,
__m512i __B) {
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M);
}
@ -88,30 +75,24 @@ extern __inline __mmask64
#define __DISABLE_AVX512BITALGVLBW__
#endif /* __AVX512VLBW__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U);
}
extern __inline __mmask32
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
}
extern __inline __mmask32
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A, __m256i __B) {
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A,
__m256i __B) {
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M);
}
@ -127,81 +108,59 @@ extern __inline __mmask32
#define __DISABLE_AVX512BITALGVL__
#endif /* __AVX512VLBW__ */
extern __inline __mmask16
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
}
extern __inline __mmask16
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A, __m128i __B) {
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A,
__m128i __B) {
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi8(__m256i __A) {
__funline __m256i _mm256_popcnt_epi8(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcountb_v32qi((__v32qi)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi16(__m256i __A) {
__funline __m256i _mm256_popcnt_epi16(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcountw_v16hi((__v16hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi8(__m128i __A) {
__funline __m128i _mm_popcnt_epi8(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcountb_v16qi((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi16(__m128i __A) {
__funline __m128i _mm_popcnt_epi16(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcountw_v8hi((__v8hi)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B,
(__mmask8)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask(
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
}

File diff suppressed because it is too large Load diff

View file

@ -20,99 +20,75 @@ typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_conflict_epi32(__m512i __A) {
__funline __m512i _mm512_conflict_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
(__v16si)__A, (__v16si)__W, (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_conflict_epi64(__m512i __A) {
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
(__mmask8)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_lzcnt_epi64(__m512i __A) {
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
(__mmask8)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_lzcnt_epi32(__m512i __A) {
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
(__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastmb_epi64(__mmask8 __A) {
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) {
return (__m512i)__builtin_ia32_broadcastmb512(__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastmw_epi32(__mmask16 __A) {
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) {
return (__m512i)__builtin_ia32_broadcastmw512(__A);
}

File diff suppressed because it is too large Load diff

View file

@ -21,159 +21,126 @@ typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_pd(__m512d __A, int __R) {
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) {
__m512d __W;
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)-1, __R);
}
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U,
__m512d __A, int __R) {
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)__U, __R);
}
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A,
int __R) {
return (__m512d)__builtin_ia32_exp2pd_mask(
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_ps(__m512 __A, int __R) {
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) {
__m512 __W;
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)-1, __R);
}
extern __inline __m512 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U,
__m512 __A, int __R) {
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A,
int __R) {
return (__m512)__builtin_ia32_exp2ps_mask(
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
}
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd(__m512d __A, int __R) {
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) {
__m512d __W;
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)-1, __R);
}
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U,
__m512d __A, int __R) {
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)__U, __R);
}
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A,
int __R) {
return (__m512d)__builtin_ia32_rcp28pd_mask(
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps(__m512 __A, int __R) {
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) {
__m512 __W;
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)-1, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A,
int __R) {
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
return (__m512)__builtin_ia32_rcp28ps_mask(
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R);
}
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd(__m512d __A, int __R) {
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) {
__m512d __W;
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)-1, __R);
}
extern __inline __m512d __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U,
__m512d __A, int __R) {
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
(__mmask8)__U, __R);
}
extern __inline __m512d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A, int __R) {
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A,
int __R) {
return (__m512d)__builtin_ia32_rsqrt28pd_mask(
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps(__m512 __A, int __R) {
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) {
__m512 __W;
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)-1, __R);
}
extern __inline __m512 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U,
__m512 __A, int __R) {
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
(__mmask16)__U, __R);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A, int __R) {
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A,
int __R) {
return (__m512)__builtin_ia32_rsqrt28ps_mask(
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R);
}

File diff suppressed because it is too large Load diff

View file

@ -11,48 +11,36 @@
#define __DISABLE_AVX512IFMA__
#endif /* __AVX512IFMA__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y,
(__v8di)__Z, (__mmask8)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y,
(__v8di)__Z, (__mmask8)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X,
__m512i __Y) {
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M,
__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_vpmadd52luq512_mask(
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X,
__m512i __Y) {
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M,
__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_vpmadd52huq512_mask(
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
__m512i __Z) {
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X,
__m512i __Y, __m512i __Z) {
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz(
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y,
__m512i __Z) {
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X,
__m512i __Y, __m512i __Z) {
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz(
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
}

View file

@ -12,90 +12,70 @@
#define __DISABLE_AVX512IFMAVL__
#endif /* __AVX512IFMAVL__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y,
(__v2di)__Z, (__mmask8)-1);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y,
(__v2di)__Z, (__mmask8)-1);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y,
(__v4di)__Z, (__mmask8)-1);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y,
(__v4di)__Z, (__mmask8)-1);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X,
__m128i __Y) {
return (__m128i)__builtin_ia32_vpmadd52luq128_mask(
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X,
__m128i __Y) {
return (__m128i)__builtin_ia32_vpmadd52huq128_mask(
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y) {
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M,
__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_vpmadd52luq256_mask(
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y) {
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M,
__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_vpmadd52huq256_mask(
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
__m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz(
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
__m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz(
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y,
__m256i __Z) {
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X,
__m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz(
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y,
__m256i __Z) {
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X,
__m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz(
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
}

View file

@ -18,130 +18,99 @@ typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
#ifdef __OPTIMIZE__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
int __scale, int __hint) {
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
int __scale, int __hint) {
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
__scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
void const *__addr, int __scale,
int __hint) {
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_ps(__m512i __index, __mmask16 __mask,
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
__mmask16 __mask,
void const *__addr, int __scale,
int __hint) {
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
int __scale, int __hint) {
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
int __scale, int __hint) {
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
void const *__addr, int __scale,
int __hint) {
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
void const *__addr, int __scale,
int __hint) {
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index, int __scale,
int __hint) {
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index,
int __scale, int __hint) {
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index, int __scale,
int __hint) {
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index,
int __scale, int __hint) {
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
__scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
__m256i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
__m512i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfdps(__mask, (__v16si)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index, int __scale,
int __hint) {
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index,
int __scale, int __hint) {
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index, int __scale,
int __hint) {
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index,
int __scale, int __hint) {
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
__m512i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
__m512i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);

View file

@ -13,101 +13,77 @@
#endif /* __AVX512VBMI2__ */
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B,
__m512i __C, int __D) {
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
(__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshrd_v8di((__v8di)__A, (__v8di)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
int __E) {
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E,
(__v8di)__A, (__mmask8)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
int __D) {
return (__m512i)__builtin_ia32_vpshrd_v8di_mask(
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
(__mmask8)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B,
__m512i __C, int __D) {
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
(__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
return (__m512i)__builtin_ia32_vpshld_v8di((__v8di)__A, (__v8di)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
int __E) {
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E,
(__v8di)__A, (__mmask8)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
int __D) {
return (__m512i)__builtin_ia32_vpshld_v8di_mask(
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
(__mmask8)__A);
@ -161,99 +137,79 @@ extern __inline __m512i
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
#endif
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshrdv_v32hi((__v32hi)__A, (__v32hi)__B,
(__v32hi)__C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshrdv_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshrdv_v8di((__v8di)__A, (__v8di)__B,
(__v8di)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask((__v8di)__A, (__v8di)__C,
(__v8di)__D, (__mmask8)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz((__v8di)__B, (__v8di)__C,
(__v8di)__D, (__mmask8)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshldv_v32hi((__v32hi)__A, (__v32hi)__B,
(__v32hi)__C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshldv_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpshldv_v8di((__v8di)__A, (__v8di)__B,
(__v8di)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v8di_mask((__v8di)__A, (__v8di)__C,
(__v8di)__D, (__mmask8)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz((__v8di)__B, (__v8di)__C,
(__v8di)__D, (__mmask8)__A);
}
@ -270,132 +226,106 @@ _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) {
#define __DISABLE_AVX512VBMI2BW__
#endif /* __AVX512VBMI2BW__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_compress_epi8(__m512i __A, __mmask64 __B, __m512i __C) {
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B,
__m512i __C) {
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A,
(__mmask64)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
return (__m512i)__builtin_ia32_compressqi512_mask(
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B, __m512i __C) {
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B,
__m512i __C) {
__builtin_ia32_compressstoreuqi512_mask((__v64qi *)__A, (__v64qi)__C,
(__mmask64)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_compress_epi16(__m512i __A, __mmask32 __B, __m512i __C) {
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B,
__m512i __C) {
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A,
(__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
return (__m512i)__builtin_ia32_compresshi512_mask(
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B, __m512i __C) {
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B,
__m512i __C) {
__builtin_ia32_compressstoreuhi512_mask((__v32hi *)__A, (__v32hi)__C,
(__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_expand_epi8(__m512i __A, __mmask64 __B, __m512i __C) {
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B,
__m512i __C) {
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A,
(__mmask64)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
return (__m512i)__builtin_ia32_expandqi512_maskz(
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B, const void *__C) {
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B,
const void *__C) {
return (__m512i)__builtin_ia32_expandloadqi512_mask(
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
return (__m512i)__builtin_ia32_expandloadqi512_maskz(
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_expand_epi16(__m512i __A, __mmask32 __B, __m512i __C) {
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B,
__m512i __C) {
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A,
(__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
return (__m512i)__builtin_ia32_expandhi512_maskz(
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B, const void *__C) {
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B,
const void *__C) {
return (__m512i)__builtin_ia32_expandloadhi512_mask(
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
return (__m512i)__builtin_ia32_expandloadhi512_maskz(
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B,
__m512i __C, int __D) {
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
(__mmask32)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__m512i __D, int __E) {
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B, __m512i __C, int __D) {
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B,
__m512i __C, int __D) {
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
(__mmask32)__A);
@ -418,30 +348,26 @@ extern __inline __m512i
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
#endif
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask(
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz(
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
__m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask(
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz(
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
}

View file

@ -12,414 +12,322 @@
#define __DISABLE_AVX512VBMI2VL__
#endif /* __AVX512VBMIVL__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
__funline __m128i _mm_mask_compress_epi8(__m128i __A, __mmask16 __B,
__m128i __C) {
return (__m128i)__builtin_ia32_compressqi128_mask((__v16qi)__C, (__v16qi)__A,
(__mmask16)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_epi8(__mmask16 __A, __m128i __B) {
__funline __m128i _mm_maskz_compress_epi8(__mmask16 __A, __m128i __B) {
return (__m128i)__builtin_ia32_compressqi128_mask(
(__v16qi)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_epi16(void *__A, __mmask16 __B, __m256i __C) {
__funline void _mm256_mask_compressstoreu_epi16(void *__A, __mmask16 __B,
__m256i __C) {
__builtin_ia32_compressstoreuhi256_mask((__v16hi *)__A, (__v16hi)__C,
(__mmask16)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compress_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
__funline __m128i _mm_mask_compress_epi16(__m128i __A, __mmask8 __B,
__m128i __C) {
return (__m128i)__builtin_ia32_compresshi128_mask((__v8hi)__C, (__v8hi)__A,
(__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_compress_epi16(__mmask8 __A, __m128i __B) {
__funline __m128i _mm_maskz_compress_epi16(__mmask8 __A, __m128i __B) {
return (__m128i)__builtin_ia32_compresshi128_mask(
(__v8hi)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_epi16(__m256i __A, __mmask16 __B, __m256i __C) {
__funline __m256i _mm256_mask_compress_epi16(__m256i __A, __mmask16 __B,
__m256i __C) {
return (__m256i)__builtin_ia32_compresshi256_mask((__v16hi)__C, (__v16hi)__A,
(__mmask16)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_epi16(__mmask16 __A, __m256i __B) {
__funline __m256i _mm256_maskz_compress_epi16(__mmask16 __A, __m256i __B) {
return (__m256i)__builtin_ia32_compresshi256_mask(
(__v16hi)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_epi8(void *__A, __mmask16 __B, __m128i __C) {
__funline void _mm_mask_compressstoreu_epi8(void *__A, __mmask16 __B,
__m128i __C) {
__builtin_ia32_compressstoreuqi128_mask((__v16qi *)__A, (__v16qi)__C,
(__mmask16)__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_compressstoreu_epi16(void *__A, __mmask8 __B, __m128i __C) {
__funline void _mm_mask_compressstoreu_epi16(void *__A, __mmask8 __B,
__m128i __C) {
__builtin_ia32_compressstoreuhi128_mask((__v8hi *)__A, (__v8hi)__C,
(__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
__funline __m128i _mm_mask_expand_epi8(__m128i __A, __mmask16 __B, __m128i __C) {
return (__m128i)__builtin_ia32_expandqi128_mask((__v16qi)__C, (__v16qi)__A,
(__mmask16)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_epi8(__mmask16 __A, __m128i __B) {
__funline __m128i _mm_maskz_expand_epi8(__mmask16 __A, __m128i __B) {
return (__m128i)__builtin_ia32_expandqi128_maskz(
(__v16qi)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_epi8(__m128i __A, __mmask16 __B, const void *__C) {
__funline __m128i _mm_mask_expandloadu_epi8(__m128i __A, __mmask16 __B,
const void *__C) {
return (__m128i)__builtin_ia32_expandloadqi128_mask(
(const __v16qi *)__C, (__v16qi)__A, (__mmask16)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_epi8(__mmask16 __A, const void *__B) {
__funline __m128i _mm_maskz_expandloadu_epi8(__mmask16 __A, const void *__B) {
return (__m128i)__builtin_ia32_expandloadqi128_maskz(
(const __v16qi *)__B, (__v16qi)_mm_setzero_si128(), (__mmask16)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expand_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
__funline __m128i _mm_mask_expand_epi16(__m128i __A, __mmask8 __B, __m128i __C) {
return (__m128i)__builtin_ia32_expandhi128_mask((__v8hi)__C, (__v8hi)__A,
(__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expand_epi16(__mmask8 __A, __m128i __B) {
__funline __m128i _mm_maskz_expand_epi16(__mmask8 __A, __m128i __B) {
return (__m128i)__builtin_ia32_expandhi128_maskz(
(__v8hi)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_expandloadu_epi16(__m128i __A, __mmask8 __B, const void *__C) {
__funline __m128i _mm_mask_expandloadu_epi16(__m128i __A, __mmask8 __B,
const void *__C) {
return (__m128i)__builtin_ia32_expandloadhi128_mask(
(const __v8hi *)__C, (__v8hi)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_expandloadu_epi16(__mmask8 __A, const void *__B) {
__funline __m128i _mm_maskz_expandloadu_epi16(__mmask8 __A, const void *__B) {
return (__m128i)__builtin_ia32_expandloadhi128_maskz(
(const __v8hi *)__B, (__v8hi)_mm_setzero_si128(), (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_epi16(__m256i __A, __mmask16 __B, __m256i __C) {
__funline __m256i _mm256_mask_expand_epi16(__m256i __A, __mmask16 __B,
__m256i __C) {
return (__m256i)__builtin_ia32_expandhi256_mask((__v16hi)__C, (__v16hi)__A,
(__mmask16)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_epi16(__mmask16 __A, __m256i __B) {
__funline __m256i _mm256_maskz_expand_epi16(__mmask16 __A, __m256i __B) {
return (__m256i)__builtin_ia32_expandhi256_maskz(
(__v16hi)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_epi16(__m256i __A, __mmask16 __B, const void *__C) {
__funline __m256i _mm256_mask_expandloadu_epi16(__m256i __A, __mmask16 __B,
const void *__C) {
return (__m256i)__builtin_ia32_expandloadhi256_mask(
(const __v16hi *)__C, (__v16hi)__A, (__mmask16)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_epi16(__mmask16 __A, const void *__B) {
__funline __m256i _mm256_maskz_expandloadu_epi16(__mmask16 __A, const void *__B) {
return (__m256i)__builtin_ia32_expandloadhi256_maskz(
(const __v16hi *)__B, (__v16hi)_mm256_setzero_si256(), (__mmask16)__A);
}
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdi_epi16(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shrdi_epi16(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshrd_v16hi((__v16hi)__A, (__v16hi)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shrdi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__funline __m256i _mm256_mask_shrdi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshrd_v16hi_mask(
(__v16hi)__C, (__v16hi)__D, __E, (__v16hi)__A, (__mmask16)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shrdi_epi16(__mmask16 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shrdi_epi16(__mmask16 __A, __m256i __B,
__m256i __C, int __D) {
return (__m256i)__builtin_ia32_vpshrd_v16hi_mask(
(__v16hi)__B, (__v16hi)__C, __D, (__v16hi)_mm256_setzero_si256(),
(__mmask16)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shrdi_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
int __E) {
__funline __m256i _mm256_mask_shrdi_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshrd_v8si_mask((__v8si)__C, (__v8si)__D, __E,
(__v8si)__A, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shrdi_epi32(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shrdi_epi32(__mmask8 __A, __m256i __B, __m256i __C,
int __D) {
return (__m256i)__builtin_ia32_vpshrd_v8si_mask(
(__v8si)__B, (__v8si)__C, __D, (__v8si)_mm256_setzero_si256(),
(__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdi_epi32(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shrdi_epi32(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshrd_v8si((__v8si)__A, (__v8si)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shrdi_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
int __E) {
__funline __m256i _mm256_mask_shrdi_epi64(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshrd_v4di_mask((__v4di)__C, (__v4di)__D, __E,
(__v4di)__A, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shrdi_epi64(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shrdi_epi64(__mmask8 __A, __m256i __B, __m256i __C,
int __D) {
return (__m256i)__builtin_ia32_vpshrd_v4di_mask(
(__v4di)__B, (__v4di)__C, __D, (__v4di)_mm256_setzero_si256(),
(__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdi_epi64(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shrdi_epi64(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshrd_v4di((__v4di)__A, (__v4di)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdi_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shrdi_epi16(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshrd_v8hi_mask((__v8hi)__C, (__v8hi)__D, __E,
(__v8hi)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdi_epi16(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shrdi_epi16(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshrd_v8hi_mask((__v8hi)__B, (__v8hi)__C, __D,
(__v8hi)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdi_epi16(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shrdi_epi16(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshrd_v8hi((__v8hi)__A, (__v8hi)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdi_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shrdi_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshrd_v4si_mask((__v4si)__C, (__v4si)__D, __E,
(__v4si)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdi_epi32(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shrdi_epi32(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshrd_v4si_mask((__v4si)__B, (__v4si)__C, __D,
(__v4si)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdi_epi32(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shrdi_epi32(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshrd_v4si((__v4si)__A, (__v4si)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdi_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shrdi_epi64(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshrd_v2di_mask((__v2di)__C, (__v2di)__D, __E,
(__v2di)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdi_epi64(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shrdi_epi64(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshrd_v2di_mask((__v2di)__B, (__v2di)__C, __D,
(__v2di)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdi_epi64(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shrdi_epi64(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshrd_v2di((__v2di)__A, (__v2di)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldi_epi16(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shldi_epi16(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshld_v16hi((__v16hi)__A, (__v16hi)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shldi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__funline __m256i _mm256_mask_shldi_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshld_v16hi_mask(
(__v16hi)__C, (__v16hi)__D, __E, (__v16hi)__A, (__mmask16)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shldi_epi16(__mmask16 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shldi_epi16(__mmask16 __A, __m256i __B,
__m256i __C, int __D) {
return (__m256i)__builtin_ia32_vpshld_v16hi_mask(
(__v16hi)__B, (__v16hi)__C, __D, (__v16hi)_mm256_setzero_si256(),
(__mmask16)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shldi_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
int __E) {
__funline __m256i _mm256_mask_shldi_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshld_v8si_mask((__v8si)__C, (__v8si)__D, __E,
(__v8si)__A, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shldi_epi32(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shldi_epi32(__mmask8 __A, __m256i __B, __m256i __C,
int __D) {
return (__m256i)__builtin_ia32_vpshld_v8si_mask(
(__v8si)__B, (__v8si)__C, __D, (__v8si)_mm256_setzero_si256(),
(__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldi_epi32(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shldi_epi32(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshld_v8si((__v8si)__A, (__v8si)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_shldi_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
int __E) {
__funline __m256i _mm256_mask_shldi_epi64(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D, int __E) {
return (__m256i)__builtin_ia32_vpshld_v4di_mask((__v4di)__C, (__v4di)__D, __E,
(__v4di)__A, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_shldi_epi64(__mmask8 __A, __m256i __B, __m256i __C, int __D) {
__funline __m256i _mm256_maskz_shldi_epi64(__mmask8 __A, __m256i __B, __m256i __C,
int __D) {
return (__m256i)__builtin_ia32_vpshld_v4di_mask(
(__v4di)__B, (__v4di)__C, __D, (__v4di)_mm256_setzero_si256(),
(__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldi_epi64(__m256i __A, __m256i __B, int __C) {
__funline __m256i _mm256_shldi_epi64(__m256i __A, __m256i __B, int __C) {
return (__m256i)__builtin_ia32_vpshld_v4di((__v4di)__A, (__v4di)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldi_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shldi_epi16(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshld_v8hi_mask((__v8hi)__C, (__v8hi)__D, __E,
(__v8hi)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldi_epi16(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shldi_epi16(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshld_v8hi_mask((__v8hi)__B, (__v8hi)__C, __D,
(__v8hi)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldi_epi16(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shldi_epi16(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshld_v8hi((__v8hi)__A, (__v8hi)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldi_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shldi_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshld_v4si_mask((__v4si)__C, (__v4si)__D, __E,
(__v4si)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldi_epi32(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shldi_epi32(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshld_v4si_mask((__v4si)__B, (__v4si)__C, __D,
(__v4si)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldi_epi32(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shldi_epi32(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshld_v4si((__v4si)__A, (__v4si)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldi_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
int __E) {
__funline __m128i _mm_mask_shldi_epi64(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D, int __E) {
return (__m128i)__builtin_ia32_vpshld_v2di_mask((__v2di)__C, (__v2di)__D, __E,
(__v2di)__A, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldi_epi64(__mmask8 __A, __m128i __B, __m128i __C, int __D) {
__funline __m128i _mm_maskz_shldi_epi64(__mmask8 __A, __m128i __B, __m128i __C,
int __D) {
return (__m128i)__builtin_ia32_vpshld_v2di_mask((__v2di)__B, (__v2di)__C, __D,
(__v2di)_mm_setzero_si128(),
(__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldi_epi64(__m128i __A, __m128i __B, int __C) {
__funline __m128i _mm_shldi_epi64(__m128i __A, __m128i __B, int __C) {
return (__m128i)__builtin_ia32_vpshld_v2di((__v2di)__A, (__v2di)__B, __C);
}
#else
@ -545,254 +453,206 @@ extern __inline __m128i
(__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
#endif
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshrdv_v16hi((__v16hi)__A, (__v16hi)__B,
(__v16hi)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask(
(__v16hi)__A, (__v16hi)__C, (__v16hi)__D, (__mmask16)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shrdv_epi16(__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shrdv_epi16(__mmask16 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz(
(__v16hi)__B, (__v16hi)__C, (__v16hi)__D, (__mmask16)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshrdv_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v8si_mask((__v8si)__A, (__v8si)__C,
(__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shrdv_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shrdv_epi32(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz((__v8si)__B, (__v8si)__C,
(__v8si)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshrdv_v4di((__v4di)__A, (__v4di)__B,
(__v4di)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v4di_mask((__v4di)__A, (__v4di)__C,
(__v4di)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shrdv_epi64(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shrdv_epi64(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz((__v4di)__B, (__v4di)__C,
(__v4di)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshrdv_v8hi((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask((__v8hi)__A, (__v8hi)__C,
(__v8hi)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdv_epi16(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shrdv_epi16(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz((__v8hi)__B, (__v8hi)__C,
(__v8hi)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshrdv_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v4si_mask((__v4si)__A, (__v4si)__C,
(__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdv_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shrdv_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz((__v4si)__B, (__v4si)__C,
(__v4si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshrdv_v2di((__v2di)__A, (__v2di)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v2di_mask((__v2di)__A, (__v2di)__C,
(__v2di)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shrdv_epi64(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shrdv_epi64(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz((__v2di)__B, (__v2di)__C,
(__v2di)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshldv_v16hi((__v16hi)__A, (__v16hi)__B,
(__v16hi)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v16hi_mask(
(__v16hi)__A, (__v16hi)__C, (__v16hi)__D, (__mmask16)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shldv_epi16(__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shldv_epi16(__mmask16 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz(
(__v16hi)__B, (__v16hi)__C, (__v16hi)__D, (__mmask16)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshldv_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v8si_mask((__v8si)__A, (__v8si)__C,
(__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shldv_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shldv_epi32(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v8si_maskz((__v8si)__B, (__v8si)__C,
(__v8si)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpshldv_v4di((__v4di)__A, (__v4di)__B,
(__v4di)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v4di_mask((__v4di)__A, (__v4di)__C,
(__v4di)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_shldv_epi64(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_shldv_epi64(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpshldv_v4di_maskz((__v4di)__B, (__v4di)__C,
(__v4di)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshldv_v8hi((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldv_epi16(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shldv_epi16(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v8hi_mask((__v8hi)__A, (__v8hi)__C,
(__v8hi)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldv_epi16(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shldv_epi16(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz((__v8hi)__B, (__v8hi)__C,
(__v8hi)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshldv_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldv_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shldv_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v4si_mask((__v4si)__A, (__v4si)__C,
(__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldv_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shldv_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v4si_maskz((__v4si)__B, (__v4si)__C,
(__v4si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpshldv_v2di((__v2di)__A, (__v2di)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_shldv_epi64(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_shldv_epi64(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v2di_mask((__v2di)__A, (__v2di)__C,
(__v2di)__D, (__mmask8)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_shldv_epi64(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_shldv_epi64(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpshldv_v2di_maskz((__v2di)__B, (__v2di)__C,
(__v2di)__D, (__mmask8)__A);
}
@ -809,51 +669,41 @@ extern __inline __m128i
#define __DISABLE_AVX512VBMI2VLBW__
#endif /* __AVX512VBMIVLBW__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compress_epi8(__m256i __A, __mmask32 __B, __m256i __C) {
__funline __m256i _mm256_mask_compress_epi8(__m256i __A, __mmask32 __B,
__m256i __C) {
return (__m256i)__builtin_ia32_compressqi256_mask((__v32qi)__C, (__v32qi)__A,
(__mmask32)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_compress_epi8(__mmask32 __A, __m256i __B) {
__funline __m256i _mm256_maskz_compress_epi8(__mmask32 __A, __m256i __B) {
return (__m256i)__builtin_ia32_compressqi256_mask(
(__v32qi)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_compressstoreu_epi8(void *__A, __mmask32 __B, __m256i __C) {
__funline void _mm256_mask_compressstoreu_epi8(void *__A, __mmask32 __B,
__m256i __C) {
__builtin_ia32_compressstoreuqi256_mask((__v32qi *)__A, (__v32qi)__C,
(__mmask32)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expand_epi8(__m256i __A, __mmask32 __B, __m256i __C) {
__funline __m256i _mm256_mask_expand_epi8(__m256i __A, __mmask32 __B,
__m256i __C) {
return (__m256i)__builtin_ia32_expandqi256_mask((__v32qi)__C, (__v32qi)__A,
(__mmask32)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expand_epi8(__mmask32 __A, __m256i __B) {
__funline __m256i _mm256_maskz_expand_epi8(__mmask32 __A, __m256i __B) {
return (__m256i)__builtin_ia32_expandqi256_maskz(
(__v32qi)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_expandloadu_epi8(__m256i __A, __mmask32 __B, const void *__C) {
__funline __m256i _mm256_mask_expandloadu_epi8(__m256i __A, __mmask32 __B,
const void *__C) {
return (__m256i)__builtin_ia32_expandloadqi256_mask(
(const __v32qi *)__C, (__v32qi)__A, (__mmask32)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_expandloadu_epi8(__mmask32 __A, const void *__B) {
__funline __m256i _mm256_maskz_expandloadu_epi8(__mmask32 __A, const void *__B) {
return (__m256i)__builtin_ia32_expandloadqi256_maskz(
(const __v32qi *)__B, (__v32qi)_mm256_setzero_si256(), (__mmask32)__A);
}

View file

@ -11,77 +11,62 @@
#define __DISABLE_AVX512VBMI__
#endif /* __AVX512VBMI__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
__m512i __Y) {
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M,
__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) {
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
__m512i __Y) {
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
(__mmask64)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(),
(__mmask64)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512_mask(
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
(__mmask64)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
__m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512_mask(
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
(__mmask64)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
__m512i __B) {
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M,
__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512_mask(
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I,
__m512i __B) {
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
(__v64qi)__I
/* idx */,
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
__m512i __B) {
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U,
__m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
(__v64qi)__I
/* idx */,
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
__m512i __B) {
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I,
__mmask64 __U, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varqi512_mask((__v64qi)__A,
(__v64qi)__I
/* idx */,
@ -89,10 +74,8 @@ extern __inline __m512i
(__mmask64)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
__m512i __B) {
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A,
__m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz(
(__v64qi)__I
/* idx */,

View file

@ -12,123 +12,98 @@
#define __DISABLE_AVX512VBMIVL__
#endif /* __AVX512VBMIVL__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
__m256i __Y) {
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M,
__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) {
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
__m256i __Y) {
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
(__mmask32)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(),
(__mmask32)-1);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
__m128i __Y) {
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M,
__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) {
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
__m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
(__mmask16)-1);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256_mask(
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
(__mmask32)-1);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
__m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256_mask(
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
(__mmask32)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
__m256i __B) {
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M,
__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256_mask(
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128_mask(
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(),
(__mmask16)-1);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A,
__m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128_mask(
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
__m128i __B) {
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M,
__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128_mask(
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I,
__m256i __B) {
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
(__v32qi)__I
/* idx */,
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
__m256i __B) {
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U,
__m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
(__v32qi)__I
/* idx */,
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
__m256i __B) {
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I,
__mmask32 __U, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varqi256_mask((__v32qi)__A,
(__v32qi)__I
/* idx */,
@ -136,39 +111,31 @@ extern __inline __m256i
(__mmask32)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
__m256i __B) {
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A,
__m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz(
(__v32qi)__I
/* idx */,
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
(__v16qi)__I
/* idx */,
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
__m128i __B) {
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U,
__m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
(__v16qi)__I
/* idx */,
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B) {
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I,
__mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varqi128_mask((__v16qi)__A,
(__v16qi)__I
/* idx */,
@ -176,10 +143,8 @@ extern __inline __m128i
(__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
__m128i __B) {
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A,
__m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz(
(__v16qi)__I
/* idx */,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -11,92 +11,70 @@
#define __DISABLE_AVX512VNNI__
#endif /* __AVX512VNNI__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpdpbusd_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpdpbusds_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpdpwssd_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
return (__m512i)__builtin_ia32_vpdpwssds_v16si((__v16si)__A, (__v16si)__B,
(__v16si)__C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask(
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz(
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
}

View file

@ -12,172 +12,138 @@
#define __DISABLE_AVX512VNNIVL__
#endif /* __AVX512VNNIVL__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask((__v8si)__A, (__v8si)__C,
(__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz(
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask((__v4si)__A, (__v4si)__C,
(__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask(
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz(
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask(
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask((__v8si)__A, (__v8si)__C,
(__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz(
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask((__v4si)__A, (__v4si)__C,
(__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask(
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B, __m256i __C,
__m256i __D) {
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz(
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
}

View file

@ -12,42 +12,32 @@
#define __DISABLE_AVX512VPOPCNTDQ__
#endif /* __AVX512VPOPCNTDQ__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi32(__m512i __A) {
__funline __m512i _mm512_popcnt_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcountd_v16si((__v16si)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U, __m512i __B) {
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U,
__m512i __B) {
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
(__v16si)__A, (__v16si)__B, (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi64(__m512i __A) {
__funline __m512i _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcountq_v8di((__v8di)__A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U, __m512i __B) {
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U,
__m512i __B) {
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B,
(__mmask8)__U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
}

View file

@ -12,82 +12,60 @@
#define __DISABLE_AVX512VPOPCNTDQVL__
#endif /* __AVX512VPOPCNTDQVL__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi32(__m128i __A) {
__funline __m128i _mm_popcnt_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcountd_v4si((__v4si)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B,
(__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask(
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi32(__m256i __A) {
__funline __m256i _mm256_popcnt_epi32(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcountd_v8si((__v8si)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U, __m256i __B) {
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B,
(__mmask16)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask(
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi64(__m128i __A) {
__funline __m128i _mm_popcnt_epi64(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcountq_v2di((__v2di)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B,
(__mmask8)__U);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask(
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi64(__m256i __A) {
__funline __m256i _mm256_popcnt_epi64(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcountq_v4di((__v4di)__A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U, __m256i __B) {
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B,
(__mmask8)__U);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask(
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
}

File diff suppressed because it is too large Load diff

View file

@ -11,47 +11,37 @@
#define __DISABLE_BMI2__
#endif /* __BMI2__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u32(unsigned int __X, unsigned int __Y) {
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_bzhi_si(__X, __Y);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pdep_u32(unsigned int __X, unsigned int __Y) {
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_pdep_si(__X, __Y);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pext_u32(unsigned int __X, unsigned int __Y) {
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_pext_si(__X, __Y);
}
#ifdef __x86_64__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
__funline unsigned long long _bzhi_u64(unsigned long long __X,
unsigned long long __Y) {
return __builtin_ia32_bzhi_di(__X, __Y);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pdep_u64(unsigned long long __X, unsigned long long __Y) {
__funline unsigned long long _pdep_u64(unsigned long long __X,
unsigned long long __Y) {
return __builtin_ia32_pdep_di(__X, __Y);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pext_u64(unsigned long long __X, unsigned long long __Y) {
__funline unsigned long long _pext_u64(unsigned long long __X,
unsigned long long __Y) {
return __builtin_ia32_pext_di(__X, __Y);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mulx_u64(unsigned long long __X, unsigned long long __Y,
__funline unsigned long long _mulx_u64(unsigned long long __X,
unsigned long long __Y,
unsigned long long *__P) {
unsigned __int128 __res = (unsigned __int128)__X * __Y;
*__P = (unsigned long long)(__res >> 64);
@ -60,9 +50,8 @@ extern __inline unsigned long long
#else /* !__x86_64__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y,
unsigned int *__P) {
unsigned long long __res = (unsigned long long)__X * __Y;
*__P = (unsigned int)(__res >> 32);
return (unsigned int)__res;

View file

@ -12,22 +12,16 @@
#endif /* __SHSTK__ */
#ifdef __x86_64__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_get_ssp(void) {
__funline unsigned long long _get_ssp(void) {
return __builtin_ia32_rdsspq();
}
#else
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_get_ssp(void) {
__funline unsigned int _get_ssp(void) {
return __builtin_ia32_rdsspd();
}
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_inc_ssp(unsigned int __B) {
__funline void _inc_ssp(unsigned int __B) {
#ifdef __x86_64__
__builtin_ia32_incsspq((unsigned long long)__B);
#else
@ -35,55 +29,39 @@ extern __inline void
#endif
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_saveprevssp(void) {
__funline void _saveprevssp(void) {
__builtin_ia32_saveprevssp();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rstorssp(void *__B) {
__funline void _rstorssp(void *__B) {
__builtin_ia32_rstorssp(__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrssd(unsigned int __B, void *__C) {
__funline void _wrssd(unsigned int __B, void *__C) {
__builtin_ia32_wrssd(__B, __C);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrssq(unsigned long long __B, void *__C) {
__funline void _wrssq(unsigned long long __B, void *__C) {
__builtin_ia32_wrssq(__B, __C);
}
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrussd(unsigned int __B, void *__C) {
__funline void _wrussd(unsigned int __B, void *__C) {
__builtin_ia32_wrussd(__B, __C);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrussq(unsigned long long __B, void *__C) {
__funline void _wrussq(unsigned long long __B, void *__C) {
__builtin_ia32_wrussq(__B, __C);
}
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_setssbsy(void) {
__funline void _setssbsy(void) {
__builtin_ia32_setssbsy();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_clrssbsy(void *__B) {
__funline void _clrssbsy(void *__B) {
__builtin_ia32_clrssbsy(__B);
}

View file

@ -10,9 +10,7 @@
#pragma GCC target("cldemote")
#define __DISABLE_CLDEMOTE__
#endif /* __CLDEMOTE__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cldemote(void *__A) {
__funline void _cldemote(void *__A) {
__builtin_ia32_cldemote(__A);
}
#ifdef __DISABLE_CLDEMOTE__

View file

@ -11,9 +11,7 @@
#define __DISABLE_CLFLUSHOPT__
#endif /* __CLFLUSHOPT__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflushopt(void *__A) {
__funline void _mm_clflushopt(void *__A) {
__builtin_ia32_clflushopt(__A);
}

View file

@ -11,9 +11,7 @@
#define __DISABLE_CLWB__
#endif /* __CLWB__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clwb(void *__A) {
__funline void _mm_clwb(void *__A) {
__builtin_ia32_clwb(__A);
}

View file

@ -8,9 +8,7 @@
#define __DISABLE_CLZERO__
#endif /* __CLZERO__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clzero(void* __I) {
__funline void _mm_clzero(void* __I) {
__builtin_ia32_clzero(__I);
}

File diff suppressed because it is too large Load diff

View file

@ -12,44 +12,32 @@
#define __DISABLE_F16C__
#endif /* __F16C__ */
extern __inline float
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cvtsh_ss(unsigned short __S) {
__funline float _cvtsh_ss(unsigned short __S) {
__v8hi __H = __extension__(__v8hi){(short)__S, 0, 0, 0, 0, 0, 0, 0};
__v4sf __A = __builtin_ia32_vcvtph2ps(__H);
return __builtin_ia32_vec_ext_v4sf(__A, 0);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtph_ps(__m128i __A) {
__funline __m128 _mm_cvtph_ps(__m128i __A) {
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__A);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtph_ps(__m128i __A) {
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__A);
}
#ifdef __OPTIMIZE__
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cvtss_sh(float __F, const int __I) {
__funline unsigned short _cvtss_sh(float __F, const int __I) {
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0};
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I);
return (unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_ph(__m128 __A, const int __I) {
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) {
return (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__A, __I);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtps_ph(__m256 __A, const int __I) {
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
return (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__A, __I);
}
#else

View file

@ -13,229 +13,165 @@
#define __DISABLE_FMA4__
#endif /* __FMA4__ */
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
/* 256b Floating point multiply/add type instructions. */
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
-(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
-(__v4df)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
-(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
-(__v4df)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
-(__v8sf)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
-(__v4df)__C);
}

View file

@ -11,224 +11,160 @@
#define __DISABLE_FMA__
#endif /* __FMA__ */
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
-(__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
-(__v8sf)__C);
}

View file

@ -11,28 +11,20 @@
#define __DISABLE_FXSR__
#endif /* __FXSR__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxsave(void *__P) {
__funline void _fxsave(void *__P) {
__builtin_ia32_fxsave(__P);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxrstor(void *__P) {
__funline void _fxrstor(void *__P) {
__builtin_ia32_fxrstor(__P);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxsave64(void *__P) {
__funline void _fxsave64(void *__P) {
__builtin_ia32_fxsave64(__P);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxrstor64(void *__P) {
__funline void _fxrstor64(void *__P) {
__builtin_ia32_fxrstor64(__P);
}
#endif

View file

@ -11,23 +11,19 @@
#define __DISABLE_GFNI__
#endif /* __GFNI__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B, const int __C) {
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B,
const int __C) {
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)__A,
(__v16qi)__B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B, const int __C) {
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B,
const int __C) {
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)__A,
(__v16qi)__B, __C);
}
@ -51,23 +47,19 @@ extern __inline __m128i
#define __DISABLE_GFNIAVX__
#endif /* __GFNIAVX__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B);
}
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B, const int __C) {
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B,
const int __C) {
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)__A,
(__v32qi)__B, __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B, const int __C) {
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B,
const int __C) {
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)__A,
(__v32qi)__B, __C);
}
@ -91,49 +83,42 @@ extern __inline __m256i
#define __DISABLE_GFNIAVX512VL__
#endif /* __GFNIAVX512VL__ */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C, __m128i __D) {
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B,
__m128i __C) {
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B, __m128i __C,
__m128i __D, const int __E) {
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B,
__m128i __C, __m128i __D,
const int __E) {
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B, __m128i __C,
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B,
__m128i __C,
const int __D) {
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B, __m128i __C,
__m128i __D, const int __E) {
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B,
__m128i __C, __m128i __D,
const int __E) {
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B, __m128i __C,
const int __D) {
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B,
__m128i __C, const int __D) {
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
}
@ -167,50 +152,43 @@ extern __inline __m128i
#define __DISABLE_GFNIAVX512VLBW__
#endif /* __GFNIAVX512VLBW__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B, __m256i __C,
__m256i __D) {
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B,
__m256i __C, __m256i __D) {
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B, __m256i __C) {
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B,
__m256i __C) {
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A);
}
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A, __mmask32 __B,
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,
__mmask32 __B,
__m256i __C, __m256i __D,
const int __E) {
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A, __m256i __B,
__m256i __C, const int __D) {
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,
__m256i __B, __m256i __C,
const int __D) {
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B, __m256i __C,
__m256i __D, const int __E) {
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B,
__m256i __C, __m256i __D,
const int __E) {
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B, __m256i __C,
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B,
__m256i __C,
const int __D) {
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
@ -245,69 +223,58 @@ extern __inline __m256i
#define __DISABLE_GFNIAVX512FBW__
#endif /* __GFNIAVX512FBW__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B, __m512i __C,
__m512i __D) {
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B,
__m512i __C, __m512i __D) {
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B, __m512i __C) {
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B,
__m512i __C) {
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A, __mmask64 __B,
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,
__mmask64 __B,
__m512i __C, __m512i __D,
const int __E) {
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A, __m512i __B,
__m512i __C, const int __D) {
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,
__m512i __B, __m512i __C,
const int __D) {
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B, const int __C) {
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B,
const int __C) {
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)__A,
(__v64qi)__B, __C);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B, __m512i __C,
__m512i __D, const int __E) {
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B,
__m512i __C, __m512i __D,
const int __E) {
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B, __m512i __C,
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B,
__m512i __C,
const int __D) {
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B, const int __C) {
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B,
const int __C) {
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)__A,
(__v64qi)__B, __C);
}

View file

@ -2,21 +2,15 @@
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
#endif
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfd(int __X) {
__funline int __bsfd(int __X) {
return __builtin_ctz(__X);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrd(int __X) {
__funline int __bsrd(int __X) {
return __builtin_ia32_bsrsi(__X);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapd(int __X) {
__funline int __bswapd(int __X) {
return __builtin_bswap32(__X);
}
@ -28,21 +22,15 @@ extern __inline int
#define __DISABLE_SSE4_2__
#endif /* __SSE4_2__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32b(unsigned int __C, unsigned char __V) {
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) {
return __builtin_ia32_crc32qi(__C, __V);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32w(unsigned int __C, unsigned short __V) {
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) {
return __builtin_ia32_crc32hi(__C, __V);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32d(unsigned int __C, unsigned int __V) {
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) {
return __builtin_ia32_crc32si(__C, __V);
}
@ -53,99 +41,71 @@ extern __inline unsigned int
#endif /* __iamcu__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__popcntd(unsigned int __X) {
__funline int __popcntd(unsigned int __X) {
return __builtin_popcount(__X);
}
#ifndef __iamcu__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdpmc(int __S) {
__funline unsigned long long __rdpmc(int __S) {
return __builtin_ia32_rdpmc(__S);
}
#endif /* __iamcu__ */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdtsc(void) {
__funline unsigned long long __rdtsc(void) {
return __builtin_ia32_rdtsc();
}
#ifndef __iamcu__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdtscp(unsigned int *__A) {
__funline unsigned long long __rdtscp(unsigned int *__A) {
return __builtin_ia32_rdtscp(__A);
}
#endif /* __iamcu__ */
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolb(unsigned char __X, int __C) {
__funline unsigned char __rolb(unsigned char __X, int __C) {
return __builtin_ia32_rolqi(__X, __C);
}
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolw(unsigned short __X, int __C) {
__funline unsigned short __rolw(unsigned short __X, int __C) {
return __builtin_ia32_rolhi(__X, __C);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rold(unsigned int __X, int __C) {
__funline unsigned int __rold(unsigned int __X, int __C) {
__C &= 31;
return (__X << __C) | (__X >> (-__C & 31));
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorb(unsigned char __X, int __C) {
__funline unsigned char __rorb(unsigned char __X, int __C) {
return __builtin_ia32_rorqi(__X, __C);
}
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorw(unsigned short __X, int __C) {
__funline unsigned short __rorw(unsigned short __X, int __C) {
return __builtin_ia32_rorhi(__X, __C);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rord(unsigned int __X, int __C) {
__funline unsigned int __rord(unsigned int __X, int __C) {
__C &= 31;
return (__X >> __C) | (__X << (-__C & 31));
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__pause(void) {
__funline void __pause(void) {
__builtin_ia32_pause();
}
#ifdef __x86_64__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfq(long long __X) {
__funline int __bsfq(long long __X) {
return __builtin_ctzll(__X);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrq(long long __X) {
__funline int __bsrq(long long __X) {
return __builtin_ia32_bsrdi(__X);
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapq(long long __X) {
__funline long long __bswapq(long long __X) {
return __builtin_bswap64(__X);
}
@ -155,9 +115,8 @@ extern __inline long long
#define __DISABLE_SSE4_2__
#endif /* __SSE4_2__ */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32q(unsigned long long __C, unsigned long long __V) {
__funline unsigned long long __crc32q(unsigned long long __C,
unsigned long long __V) {
return __builtin_ia32_crc32di(__C, __V);
}
@ -166,35 +125,25 @@ extern __inline unsigned long long
#pragma GCC pop_options
#endif /* __DISABLE_SSE4_2__ */
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__popcntq(unsigned long long __X) {
__funline long long __popcntq(unsigned long long __X) {
return __builtin_popcountll(__X);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolq(unsigned long long __X, int __C) {
__funline unsigned long long __rolq(unsigned long long __X, int __C) {
__C &= 63;
return (__X << __C) | (__X >> (-__C & 63));
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorq(unsigned long long __X, int __C) {
__funline unsigned long long __rorq(unsigned long long __X, int __C) {
__C &= 63;
return (__X >> __C) | (__X << (-__C & 63));
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags(void) {
__funline unsigned long long __readeflags(void) {
return __builtin_ia32_readeflags_u64();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags(unsigned long long __X) {
__funline void __writeeflags(unsigned long long __X) {
__builtin_ia32_writeeflags_u64(__X);
}
@ -202,15 +151,11 @@ extern __inline void
#define _popcnt64(a) __popcntq(a)
#else
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags(void) {
__funline unsigned int __readeflags(void) {
return __builtin_ia32_readeflags_u32();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags(unsigned int __X) {
__funline void __writeeflags(unsigned int __X) {
__builtin_ia32_writeeflags_u32(__X);
}

View file

@ -65,9 +65,7 @@
#include "third_party/intel/pkuintrin.internal.h"
/* clang-format on */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wbinvd(void) {
__funline void _wbinvd(void) {
__builtin_ia32_wbinvd();
}
@ -76,15 +74,11 @@ extern __inline void
#pragma GCC target("rdrnd")
#define __DISABLE_RDRND__
#endif /* __RDRND__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdrand16_step(unsigned short *__P) {
__funline int _rdrand16_step(unsigned short *__P) {
return __builtin_ia32_rdrand16_step(__P);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdrand32_step(unsigned int *__P) {
__funline int _rdrand32_step(unsigned int *__P) {
return __builtin_ia32_rdrand32_step(__P);
}
#ifdef __DISABLE_RDRND__
@ -97,9 +91,7 @@ extern __inline int
#pragma GCC target("rdpid")
#define __DISABLE_RDPID__
#endif /* __RDPID__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdpid_u32(void) {
__funline unsigned int _rdpid_u32(void) {
return __builtin_ia32_rdpid();
}
#ifdef __DISABLE_RDPID__
@ -114,51 +106,35 @@ extern __inline unsigned int
#pragma GCC target("fsgsbase")
#define __DISABLE_FSGSBASE__
#endif /* __FSGSBASE__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_readfsbase_u32(void) {
__funline unsigned int _readfsbase_u32(void) {
return __builtin_ia32_rdfsbase32();
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_readfsbase_u64(void) {
__funline unsigned long long _readfsbase_u64(void) {
return __builtin_ia32_rdfsbase64();
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_readgsbase_u32(void) {
__funline unsigned int _readgsbase_u32(void) {
return __builtin_ia32_rdgsbase32();
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_readgsbase_u64(void) {
__funline unsigned long long _readgsbase_u64(void) {
return __builtin_ia32_rdgsbase64();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_writefsbase_u32(unsigned int __B) {
__funline void _writefsbase_u32(unsigned int __B) {
__builtin_ia32_wrfsbase32(__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_writefsbase_u64(unsigned long long __B) {
__funline void _writefsbase_u64(unsigned long long __B) {
__builtin_ia32_wrfsbase64(__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_writegsbase_u32(unsigned int __B) {
__funline void _writegsbase_u32(unsigned int __B) {
__builtin_ia32_wrgsbase32(__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_writegsbase_u64(unsigned long long __B) {
__funline void _writegsbase_u64(unsigned long long __B) {
__builtin_ia32_wrgsbase64(__B);
}
#ifdef __DISABLE_FSGSBASE__
@ -171,9 +147,7 @@ extern __inline void
#pragma GCC target("rdrnd")
#define __DISABLE_RDRND__
#endif /* __RDRND__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdrand64_step(unsigned long long *__P) {
__funline int _rdrand64_step(unsigned long long *__P) {
return __builtin_ia32_rdrand64_step(__P);
}
#ifdef __DISABLE_RDRND__
@ -190,16 +164,12 @@ extern __inline int
#endif
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_ptwrite64(unsigned long long __B) {
__funline void _ptwrite64(unsigned long long __B) {
__builtin_ia32_ptwrite64(__B);
}
#endif /* __x86_64__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_ptwrite32(unsigned __B) {
__funline void _ptwrite32(unsigned __B) {
__builtin_ia32_ptwrite32(__B);
}
#ifdef __DISABLE_PTWRITE__

View file

@ -11,28 +11,22 @@
#define __DISABLE_LWP__
#endif /* __LWP__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__llwpcb(void *__pcbAddress) {
__funline void __llwpcb(void *__pcbAddress) {
__builtin_ia32_llwpcb(__pcbAddress);
}
extern __inline void *__attribute__((__gnu_inline__, __always_inline__,
__artificial__)) __slwpcb(void) {
__funline void *__slwpcb(void) {
return __builtin_ia32_slwpcb();
}
#ifdef __OPTIMIZE__
extern __inline void __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
__lwpval32(unsigned int __data2, unsigned int __data1, unsigned int __flags) {
__funline void __lwpval32(unsigned int __data2, unsigned int __data1,
unsigned int __flags) {
__builtin_ia32_lwpval32(__data2, __data1, __flags);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lwpval64(unsigned long long __data2, unsigned int __data1,
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1,
unsigned int __flags) {
__builtin_ia32_lwpval64(__data2, __data1, __flags);
}
@ -49,17 +43,14 @@ extern __inline void
#endif
#ifdef __OPTIMIZE__
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
__lwpins32(unsigned int __data2, unsigned int __data1, unsigned int __flags) {
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1,
unsigned int __flags) {
return __builtin_ia32_lwpins32(__data2, __data1, __flags);
}
#ifdef __x86_64__
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lwpins64(unsigned long long __data2, unsigned int __data1,
unsigned int __flags) {
__funline unsigned char __lwpins64(unsigned long long __data2,
unsigned int __data1, unsigned int __flags) {
return __builtin_ia32_lwpins64(__data2, __data1, __flags);
}
#endif

View file

@ -11,34 +11,24 @@
#define __DISABLE_LZCNT__
#endif /* __LZCNT__ */
extern __inline unsigned short
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt16(unsigned short __X) {
__funline unsigned short __lzcnt16(unsigned short __X) {
return __builtin_ia32_lzcnt_u16(__X);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt32(unsigned int __X) {
__funline unsigned int __lzcnt32(unsigned int __X) {
return __builtin_ia32_lzcnt_u32(__X);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_lzcnt_u32(unsigned int __X) {
__funline unsigned int _lzcnt_u32(unsigned int __X) {
return __builtin_ia32_lzcnt_u32(__X);
}
#ifdef __x86_64__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt64(unsigned long long __X) {
__funline unsigned long long __lzcnt64(unsigned long long __X) {
return __builtin_ia32_lzcnt_u64(__X);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_lzcnt_u64(unsigned long long __X) {
__funline unsigned long long _lzcnt_u64(unsigned long long __X) {
return __builtin_ia32_lzcnt_u64(__X);
}
#endif

View file

@ -14,141 +14,95 @@
#define __DISABLE_3dNOW__
#endif /* __3dNOW__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms(void) {
__funline void _m_femms(void) {
__builtin_ia32_femms();
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgusb(__m64 __A, __m64 __B) {
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pavgusb((__v8qi)__A, (__v8qi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2id(__m64 __A) {
__funline __m64 _m_pf2id(__m64 __A) {
return (__m64)__builtin_ia32_pf2id((__v2sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfacc(__m64 __A, __m64 __B) {
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfacc((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfadd(__m64 __A, __m64 __B) {
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfadd((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpeq(__m64 __A, __m64 __B) {
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpge(__m64 __A, __m64 __B) {
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpgt(__m64 __A, __m64 __B) {
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmax(__m64 __A, __m64 __B) {
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfmax((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmin(__m64 __A, __m64 __B) {
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfmin((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmul(__m64 __A, __m64 __B) {
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfmul((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcp(__m64 __A) {
__funline __m64 _m_pfrcp(__m64 __A) {
return (__m64)__builtin_ia32_pfrcp((__v2sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit1(__m64 __A, __m64 __B) {
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit2(__m64 __A, __m64 __B) {
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqrt(__m64 __A) {
__funline __m64 _m_pfrsqrt(__m64 __A) {
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqit1(__m64 __A, __m64 __B) {
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsub(__m64 __A, __m64 __B) {
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfsub((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsubr(__m64 __A, __m64 __B) {
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfsubr((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fd(__m64 __A) {
__funline __m64 _m_pi2fd(__m64 __A) {
return (__m64)__builtin_ia32_pi2fd((__v2si)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhrw(__m64 __A, __m64 __B) {
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__A, (__v4hi)__B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetch(void *__P) {
__funline void _m_prefetch(void *__P) {
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_float(float __A) {
__funline __m64 _m_from_float(float __A) {
return __extension__(__m64)(__v2sf){__A, 0.0f};
}
extern __inline float
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_float(__m64 __A) {
__funline float _m_to_float(__m64 __A) {
union {
__v2sf v;
float a[2];
@ -172,33 +126,23 @@ extern __inline float
#define __DISABLE_3dNOW_A__
#endif /* __3dNOW_A__ */
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2iw(__m64 __A) {
__funline __m64 _m_pf2iw(__m64 __A) {
return (__m64)__builtin_ia32_pf2iw((__v2sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfnacc(__m64 __A, __m64 __B) {
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfnacc((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfpnacc(__m64 __A, __m64 __B) {
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) {
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__A, (__v2sf)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fw(__m64 __A) {
__funline __m64 _m_pi2fw(__m64 __A) {
return (__m64)__builtin_ia32_pi2fw((__v2si)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pswapd(__m64 __A) {
__funline __m64 _m_pswapd(__m64 __A) {
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__A);
}

View file

@ -23,231 +23,157 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
typedef long long __v1di __attribute__((__vector_size__(8)));
typedef float __v2sf __attribute__((__vector_size__(8)));
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty(void) {
__funline void _mm_empty(void) {
__builtin_ia32_emms();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_empty(void) {
__funline void _m_empty(void) {
_mm_empty();
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si64(int __i) {
__funline __m64 _mm_cvtsi32_si64(int __i) {
return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int(int __i) {
__funline __m64 _m_from_int(int __i) {
return _mm_cvtsi32_si64(__i);
}
#ifdef __x86_64__
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int64(long long __i) {
__funline __m64 _m_from_int64(long long __i) {
return (__m64)__i;
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_m64(long long __i) {
__funline __m64 _mm_cvtsi64_m64(long long __i) {
return (__m64)__i;
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si64(long long __i) {
__funline __m64 _mm_cvtsi64x_si64(long long __i) {
return (__m64)__i;
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi64x(long long __i) {
__funline __m64 _mm_set_pi64x(long long __i) {
return (__m64)__i;
}
#endif
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si32(__m64 __i) {
__funline int _mm_cvtsi64_si32(__m64 __i) {
return __builtin_ia32_vec_ext_v2si((__v2si)__i, 0);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int(__m64 __i) {
__funline int _m_to_int(__m64 __i) {
return _mm_cvtsi64_si32(__i);
}
#ifdef __x86_64__
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int64(__m64 __i) {
__funline long long _m_to_int64(__m64 __i) {
return (long long)__i;
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtm64_si64(__m64 __i) {
__funline long long _mm_cvtm64_si64(__m64 __i) {
return (long long)__i;
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si64x(__m64 __i) {
__funline long long _mm_cvtsi64_si64x(__m64 __i) {
return (long long)__i;
}
#endif
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_packs_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packsswb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_packsswb(__m64 __m1, __m64 __m2) {
return _mm_packs_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_packs_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packssdw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_packssdw(__m64 __m1, __m64 __m2) {
return _mm_packs_pi32(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_packs_pu16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packuswb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_packuswb(__m64 __m1, __m64 __m2) {
return _mm_packs_pu16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhbw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpckhbw(__m64 __m1, __m64 __m2) {
return _mm_unpackhi_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhwd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpckhwd(__m64 __m1, __m64 __m2) {
return _mm_unpackhi_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhdq(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpckhdq(__m64 __m1, __m64 __m2) {
return _mm_unpackhi_pi32(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklbw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpcklbw(__m64 __m1, __m64 __m2) {
return _mm_unpacklo_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklwd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpcklwd(__m64 __m1, __m64 __m2) {
return _mm_unpacklo_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckldq(__m64 __m1, __m64 __m2) {
__funline __m64 _m_punpckldq(__m64 __m1, __m64 __m2) {
return _mm_unpacklo_pi32(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_add_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddb(__m64 __m1, __m64 __m2) {
return _mm_add_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_add_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddw(__m64 __m1, __m64 __m2) {
return _mm_add_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_add_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddd(__m64 __m1, __m64 __m2) {
return _mm_add_pi32(__m1, __m2);
}
@ -257,9 +183,7 @@ extern __inline __m64
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_add_si64(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddq((__v1di)__m1, (__v1di)__m2);
}
#ifdef __DISABLE_SSE2__
@ -267,87 +191,59 @@ extern __inline __m64
#pragma GCC pop_options
#endif /* __DISABLE_SSE2__ */
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_adds_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddsb(__m64 __m1, __m64 __m2) {
return _mm_adds_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_adds_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddsw(__m64 __m1, __m64 __m2) {
return _mm_adds_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_adds_pu8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddusb(__m64 __m1, __m64 __m2) {
return _mm_adds_pu8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_adds_pu16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_paddusw(__m64 __m1, __m64 __m2) {
return _mm_adds_pu16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_sub_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubb(__m64 __m1, __m64 __m2) {
return _mm_sub_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_sub_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubw(__m64 __m1, __m64 __m2) {
return _mm_sub_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_sub_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubd(__m64 __m1, __m64 __m2) {
return _mm_sub_pi32(__m1, __m2);
}
@ -357,9 +253,7 @@ extern __inline __m64
#define __DISABLE_SSE2__
#endif /* __SSE2__ */
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_sub_si64(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubq((__v1di)__m1, (__v1di)__m2);
}
#ifdef __DISABLE_SSE2__
@ -367,462 +261,310 @@ extern __inline __m64
#pragma GCC pop_options
#endif /* __DISABLE_SSE2__ */
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_subs_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubsb(__m64 __m1, __m64 __m2) {
return _mm_subs_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_subs_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubsw(__m64 __m1, __m64 __m2) {
return _mm_subs_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_subs_pu8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubusb(__m64 __m1, __m64 __m2) {
return _mm_subs_pu8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_subs_pu16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_psubusw(__m64 __m1, __m64 __m2) {
return _mm_subs_pu16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_madd_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaddwd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pmaddwd(__m64 __m1, __m64 __m2) {
return _mm_madd_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pmulhw(__m64 __m1, __m64 __m2) {
return _mm_mulhi_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_mullo_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmullw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pmullw(__m64 __m1, __m64 __m2) {
return _mm_mullo_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi16(__m64 __m, __m64 __count) {
__funline __m64 _mm_sll_pi16(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psllw((__v4hi)__m, (__v4hi)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllw(__m64 __m, __m64 __count) {
__funline __m64 _m_psllw(__m64 __m, __m64 __count) {
return _mm_sll_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16(__m64 __m, int __count) {
__funline __m64 _mm_slli_pi16(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllwi(__m64 __m, int __count) {
__funline __m64 _m_psllwi(__m64 __m, int __count) {
return _mm_slli_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi32(__m64 __m, __m64 __count) {
__funline __m64 _mm_sll_pi32(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_pslld((__v2si)__m, (__v2si)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslld(__m64 __m, __m64 __count) {
__funline __m64 _m_pslld(__m64 __m, __m64 __count) {
return _mm_sll_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi32(__m64 __m, int __count) {
__funline __m64 _mm_slli_pi32(__m64 __m, int __count) {
return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslldi(__m64 __m, int __count) {
__funline __m64 _m_pslldi(__m64 __m, int __count) {
return _mm_slli_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_si64(__m64 __m, __m64 __count) {
__funline __m64 _mm_sll_si64(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psllq((__v1di)__m, (__v1di)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllq(__m64 __m, __m64 __count) {
__funline __m64 _m_psllq(__m64 __m, __m64 __count) {
return _mm_sll_si64(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64(__m64 __m, int __count) {
__funline __m64 _mm_slli_si64(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllqi(__m64 __m, int __count) {
__funline __m64 _m_psllqi(__m64 __m, int __count) {
return _mm_slli_si64(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi16(__m64 __m, __m64 __count) {
__funline __m64 _mm_sra_pi16(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psraw((__v4hi)__m, (__v4hi)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psraw(__m64 __m, __m64 __count) {
__funline __m64 _m_psraw(__m64 __m, __m64 __count) {
return _mm_sra_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi16(__m64 __m, int __count) {
__funline __m64 _mm_srai_pi16(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrawi(__m64 __m, int __count) {
__funline __m64 _m_psrawi(__m64 __m, int __count) {
return _mm_srai_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi32(__m64 __m, __m64 __count) {
__funline __m64 _mm_sra_pi32(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psrad((__v2si)__m, (__v2si)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrad(__m64 __m, __m64 __count) {
__funline __m64 _m_psrad(__m64 __m, __m64 __count) {
return _mm_sra_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi32(__m64 __m, int __count) {
__funline __m64 _mm_srai_pi32(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psradi(__m64 __m, int __count) {
__funline __m64 _m_psradi(__m64 __m, int __count) {
return _mm_srai_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi16(__m64 __m, __m64 __count) {
__funline __m64 _mm_srl_pi16(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psrlw((__v4hi)__m, (__v4hi)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlw(__m64 __m, __m64 __count) {
__funline __m64 _m_psrlw(__m64 __m, __m64 __count) {
return _mm_srl_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16(__m64 __m, int __count) {
__funline __m64 _mm_srli_pi16(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlwi(__m64 __m, int __count) {
__funline __m64 _m_psrlwi(__m64 __m, int __count) {
return _mm_srli_pi16(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi32(__m64 __m, __m64 __count) {
__funline __m64 _mm_srl_pi32(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psrld((__v2si)__m, (__v2si)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrld(__m64 __m, __m64 __count) {
__funline __m64 _m_psrld(__m64 __m, __m64 __count) {
return _mm_srl_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32(__m64 __m, int __count) {
__funline __m64 _mm_srli_pi32(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrldi(__m64 __m, int __count) {
__funline __m64 _m_psrldi(__m64 __m, int __count) {
return _mm_srli_pi32(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_si64(__m64 __m, __m64 __count) {
__funline __m64 _mm_srl_si64(__m64 __m, __m64 __count) {
return (__m64)__builtin_ia32_psrlq((__v1di)__m, (__v1di)__count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlq(__m64 __m, __m64 __count) {
__funline __m64 _m_psrlq(__m64 __m, __m64 __count) {
return _mm_srl_si64(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64(__m64 __m, int __count) {
__funline __m64 _mm_srli_si64(__m64 __m, int __count) {
return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlqi(__m64 __m, int __count) {
__funline __m64 _m_psrlqi(__m64 __m, int __count) {
return _mm_srli_si64(__m, __count);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_and_si64(__m64 __m1, __m64 __m2) {
return __builtin_ia32_pand(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pand(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pand(__m64 __m1, __m64 __m2) {
return _mm_and_si64(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_andnot_si64(__m64 __m1, __m64 __m2) {
return __builtin_ia32_pandn(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pandn(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pandn(__m64 __m1, __m64 __m2) {
return _mm_andnot_si64(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_or_si64(__m64 __m1, __m64 __m2) {
return __builtin_ia32_por(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_por(__m64 __m1, __m64 __m2) {
__funline __m64 _m_por(__m64 __m1, __m64 __m2) {
return _mm_or_si64(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_xor_si64(__m64 __m1, __m64 __m2) {
return __builtin_ia32_pxor(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pxor(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pxor(__m64 __m1, __m64 __m2) {
return _mm_xor_si64(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpeqb(__m64 __m1, __m64 __m2) {
return _mm_cmpeq_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtb(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpgtb(__m64 __m1, __m64 __m2) {
return _mm_cmpgt_pi8(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpeqw(__m64 __m1, __m64 __m2) {
return _mm_cmpeq_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtw(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpgtw(__m64 __m1, __m64 __m2) {
return _mm_cmpgt_pi16(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpeqd(__m64 __m1, __m64 __m2) {
return _mm_cmpeq_pi32(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
__funline __m64 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtd(__m64 __m1, __m64 __m2) {
__funline __m64 _m_pcmpgtd(__m64 __m1, __m64 __m2) {
return _mm_cmpgt_pi32(__m1, __m2);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64(void) {
__funline __m64 _mm_setzero_si64(void) {
return (__m64)0LL;
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32(int __i1, int __i0) {
__funline __m64 _mm_set_pi32(int __i1, int __i0) {
return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
__funline __m64 _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
return (__m64)__builtin_ia32_vec_init_v4hi(__w0, __w1, __w2, __w3);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
__funline __m64 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
char __b2, char __b1, char __b0) {
return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5,
__b6, __b7);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi32(int __i0, int __i1) {
__funline __m64 _mm_setr_pi32(int __i0, int __i1) {
return _mm_set_pi32(__i1, __i0);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
__funline __m64 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
return _mm_set_pi16(__w3, __w2, __w1, __w0);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4,
char __b5, char __b6, char __b7) {
__funline __m64 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3,
char __b4, char __b5, char __b6, char __b7) {
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi32(int __i) {
__funline __m64 _mm_set1_pi32(int __i) {
return _mm_set_pi32(__i, __i);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi16(short __w) {
__funline __m64 _mm_set1_pi16(short __w) {
return _mm_set_pi16(__w, __w, __w, __w);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi8(char __b) {
__funline __m64 _mm_set1_pi8(char __b) {
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
}
#ifdef __DISABLE_MMX__

View file

@ -11,15 +11,11 @@
#define __DISABLE_MOVDIRI__
#endif /* __MOVDIRI__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_directstoreu_u32(void *__P, unsigned int __A) {
__funline void _directstoreu_u32(void *__P, unsigned int __A) {
__builtin_ia32_directstoreu_u32((unsigned int *)__P, __A);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_directstoreu_u64(void *__P, unsigned long long __A) {
__funline void _directstoreu_u64(void *__P, unsigned long long __A) {
__builtin_ia32_directstoreu_u64((unsigned long long *)__P, __A);
}
#endif
@ -35,9 +31,7 @@ extern __inline void
#define __DISABLE_MOVDIR64B__
#endif /* __MOVDIR64B__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_movdir64b(void *__P, const void *__Q) {
__funline void _movdir64b(void *__P, const void *__Q) {
__builtin_ia32_movdir64b(__P, __Q);
}

View file

@ -8,15 +8,11 @@
#define __DISABLE_MWAITX__
#endif /* __MWAITX__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitorx(void const* __P, unsigned int __E, unsigned int __H) {
__funline void _mm_monitorx(void const* __P, unsigned int __E, unsigned int __H) {
__builtin_ia32_monitorx(__P, __E, __H);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwaitx(unsigned int __E, unsigned int __H, unsigned int __C) {
__funline void _mm_mwaitx(unsigned int __E, unsigned int __H, unsigned int __C) {
__builtin_ia32_mwaitx(__E, __H, __C);
}

View file

@ -24,9 +24,7 @@
: "a"(leaf), "b"(b), "c"(c), "d"(d) \
: "cc")
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pconfig_u32(const unsigned int __L, size_t __D[]) {
__funline unsigned int _pconfig_u32(const unsigned int __L, size_t __D[]) {
enum __pconfig_type {
__PCONFIG_KEY_PROGRAM = 0x01,
};

View file

@ -11,15 +11,11 @@
#define __DISABLE_PKU__
#endif /* __PKU__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdpkru_u32(void) {
__funline unsigned int _rdpkru_u32(void) {
return __builtin_ia32_rdpkru();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrpkru(unsigned int __key) {
__funline void _wrpkru(unsigned int __key) {
__builtin_ia32_wrpkru(__key);
}

View file

@ -17,81 +17,55 @@
_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps(__m128 __X, __m128 __Y) {
__funline __m128 _mm_addsub_ps(__m128 __X, __m128 __Y) {
return (__m128)__builtin_ia32_addsubps((__v4sf)__X, (__v4sf)__Y);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps(__m128 __X, __m128 __Y) {
__funline __m128 _mm_hadd_ps(__m128 __X, __m128 __Y) {
return (__m128)__builtin_ia32_haddps((__v4sf)__X, (__v4sf)__Y);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps(__m128 __X, __m128 __Y) {
__funline __m128 _mm_hsub_ps(__m128 __X, __m128 __Y) {
return (__m128)__builtin_ia32_hsubps((__v4sf)__X, (__v4sf)__Y);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps(__m128 __X) {
__funline __m128 _mm_movehdup_ps(__m128 __X) {
return (__m128)__builtin_ia32_movshdup((__v4sf)__X);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps(__m128 __X) {
__funline __m128 _mm_moveldup_ps(__m128 __X) {
return (__m128)__builtin_ia32_movsldup((__v4sf)__X);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd(__m128d __X, __m128d __Y) {
__funline __m128d _mm_addsub_pd(__m128d __X, __m128d __Y) {
return (__m128d)__builtin_ia32_addsubpd((__v2df)__X, (__v2df)__Y);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd(__m128d __X, __m128d __Y) {
__funline __m128d _mm_hadd_pd(__m128d __X, __m128d __Y) {
return (__m128d)__builtin_ia32_haddpd((__v2df)__X, (__v2df)__Y);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd(__m128d __X, __m128d __Y) {
__funline __m128d _mm_hsub_pd(__m128d __X, __m128d __Y) {
return (__m128d)__builtin_ia32_hsubpd((__v2df)__X, (__v2df)__Y);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd(double const *__P) {
__funline __m128d _mm_loaddup_pd(double const *__P) {
return _mm_load1_pd(__P);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd(__m128d __X) {
__funline __m128d _mm_movedup_pd(__m128d __X) {
return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0));
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128(__m128i const *__P) {
__funline __m128i _mm_lddqu_si128(__m128i const *__P) {
return (__m128i)__builtin_ia32_lddqu((char const *)__P);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitor(void const *__P, unsigned int __E, unsigned int __H) {
__funline void _mm_monitor(void const *__P, unsigned int __E, unsigned int __H) {
__builtin_ia32_monitor(__P, __E, __H);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwait(unsigned int __E, unsigned int __H) {
__funline void _mm_mwait(unsigned int __E, unsigned int __H) {
__builtin_ia32_mwait(__E, __H);
}

View file

@ -8,16 +8,12 @@
#define __DISABLE_POPCNT__
#endif /* __POPCNT__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u32(unsigned int __X) {
__funline int _mm_popcnt_u32(unsigned int __X) {
return __builtin_popcount(__X);
}
#ifdef __x86_64__
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u64(unsigned long long __X) {
__funline long long _mm_popcnt_u64(unsigned long long __X) {
return __builtin_popcountll(__X);
}
#endif

View file

@ -6,9 +6,7 @@
#ifndef _PRFCHWINTRIN_H_INCLUDED
#define _PRFCHWINTRIN_H_INCLUDED
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetchw(void *__P) {
__funline void _m_prefetchw(void *__P) {
__builtin_prefetch(__P, 1, 3 /* _MM_HINT_T0 */);
}

View file

@ -11,22 +11,16 @@
#define __DISABLE_RDSEED__
#endif /* __RDSEED__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdseed16_step(unsigned short *__p) {
__funline int _rdseed16_step(unsigned short *__p) {
return __builtin_ia32_rdseed_hi_step(__p);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdseed32_step(unsigned int *__p) {
__funline int _rdseed32_step(unsigned int *__p) {
return __builtin_ia32_rdseed_si_step(__p);
}
#ifdef __x86_64__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdseed64_step(unsigned long long *__p) {
__funline int _rdseed64_step(unsigned long long *__p) {
return __builtin_ia32_rdseed_di_step(__p);
}
#endif

View file

@ -20,22 +20,16 @@
#define _XABORT_NESTED (1 << 5)
#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xbegin(void) {
__funline unsigned int _xbegin(void) {
return __builtin_ia32_xbegin();
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xend(void) {
__funline void _xend(void) {
__builtin_ia32_xend();
}
#ifdef __OPTIMIZE__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xabort(const unsigned int __imm) {
__funline void _xabort(const unsigned int __imm) {
__builtin_ia32_xabort(__imm);
}
#else

View file

@ -80,9 +80,7 @@
: "a"(leaf), "b"(b), "c"(c), "d"(d) \
: "cc")
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_encls_u32(const unsigned int __L, size_t __D[]) {
__funline unsigned int _encls_u32(const unsigned int __L, size_t __D[]) {
enum __encls_type {
__SGX_ECREATE = 0x00,
__SGX_EADD = 0x01,
@ -145,9 +143,7 @@ extern __inline unsigned int
return __R;
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_enclu_u32(const unsigned int __L, size_t __D[]) {
__funline unsigned int _enclu_u32(const unsigned int __L, size_t __D[]) {
enum __enclu_type {
__SGX_EREPORT = 0x00,
__SGX_EGETKEY = 0x01,
@ -186,9 +182,7 @@ extern __inline unsigned int
return __R;
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_enclv_u32(const unsigned int __L, size_t __D[]) {
__funline unsigned int _enclv_u32(const unsigned int __L, size_t __D[]) {
enum __enclv_type {
__SGX_EDECVIRTCHILD = 0x00,
__SGX_EINCVIRTCHILD = 0x01,

View file

@ -11,28 +11,20 @@
#define __DISABLE_SHA__
#endif /* __SHA__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha1msg1_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha1msg1_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_sha1msg1((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha1msg2_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha1msg2_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_sha1msg2((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha1nexte_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha1nexte_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_sha1nexte((__v4si)__A, (__v4si)__B);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha1rnds4_epu32(__m128i __A, __m128i __B, const int __I) {
__funline __m128i _mm_sha1rnds4_epu32(__m128i __A, __m128i __B, const int __I) {
return (__m128i)__builtin_ia32_sha1rnds4((__v4si)__A, (__v4si)__B, __I);
}
#else
@ -41,21 +33,15 @@ extern __inline __m128i
(int)I))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha256msg1_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha256msg1_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_sha256msg1((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha256msg2_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha256msg2_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_sha256msg2((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha256rnds2_epu32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_sha256rnds2_epu32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}

View file

@ -25,21 +25,15 @@
#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_si128(__m128i __M, __m128i __V) {
__funline int _mm_testz_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_si128(__m128i __M, __m128i __V) {
__funline int _mm_testc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_si128(__m128i __M, __m128i __V) {
__funline int _mm_testnzc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
@ -50,15 +44,11 @@ extern __inline int
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_pd(__m128d __V, const int __M) {
__funline __m128d _mm_round_pd(__m128d __V, const int __M) {
return (__m128d)__builtin_ia32_roundpd((__v2df)__V, __M);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_sd(__m128d __D, __m128d __V, const int __M) {
__funline __m128d _mm_round_sd(__m128d __D, __m128d __V, const int __M) {
return (__m128d)__builtin_ia32_roundsd((__v2df)__D, (__v2df)__V, __M);
}
#else
@ -71,15 +61,11 @@ extern __inline __m128d
#endif
#ifdef __OPTIMIZE__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ps(__m128 __V, const int __M) {
__funline __m128 _mm_round_ps(__m128 __V, const int __M) {
return (__m128)__builtin_ia32_roundps((__v4sf)__V, __M);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_round_ss(__m128 __D, __m128 __V, const int __M) {
__funline __m128 _mm_round_ss(__m128 __D, __m128 __V, const int __M) {
return (__m128)__builtin_ia32_roundss((__v4sf)__D, (__v4sf)__V, __M);
}
#else
@ -104,9 +90,7 @@ extern __inline __m128
#define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16(__m128i __X, __m128i __Y, const int __M) {
__funline __m128i _mm_blend_epi16(__m128i __X, __m128i __Y, const int __M) {
return (__m128i)__builtin_ia32_pblendw128((__v8hi)__X, (__v8hi)__Y, __M);
}
#else
@ -115,17 +99,13 @@ extern __inline __m128i
(__v8hi)(__m128i)(Y), (int)(M)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8(__m128i __X, __m128i __Y, __m128i __M) {
__funline __m128i _mm_blendv_epi8(__m128i __X, __m128i __Y, __m128i __M) {
return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__X, (__v16qi)__Y,
(__v16qi)__M);
}
#ifdef __OPTIMIZE__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_ps(__m128 __X, __m128 __Y, const int __M) {
__funline __m128 _mm_blend_ps(__m128 __X, __m128 __Y, const int __M) {
return (__m128)__builtin_ia32_blendps((__v4sf)__X, (__v4sf)__Y, __M);
}
#else
@ -134,16 +114,12 @@ extern __inline __m128
(int)(M)))
#endif
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_ps(__m128 __X, __m128 __Y, __m128 __M) {
__funline __m128 _mm_blendv_ps(__m128 __X, __m128 __Y, __m128 __M) {
return (__m128)__builtin_ia32_blendvps((__v4sf)__X, (__v4sf)__Y, (__v4sf)__M);
}
#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_pd(__m128d __X, __m128d __Y, const int __M) {
__funline __m128d _mm_blend_pd(__m128d __X, __m128d __Y, const int __M) {
return (__m128d)__builtin_ia32_blendpd((__v2df)__X, (__v2df)__Y, __M);
}
#else
@ -152,23 +128,17 @@ extern __inline __m128d
(int)(M)))
#endif
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_pd(__m128d __X, __m128d __Y, __m128d __M) {
__funline __m128d _mm_blendv_pd(__m128d __X, __m128d __Y, __m128d __M) {
return (__m128d)__builtin_ia32_blendvpd((__v2df)__X, (__v2df)__Y,
(__v2df)__M);
}
#ifdef __OPTIMIZE__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_ps(__m128 __X, __m128 __Y, const int __M) {
__funline __m128 _mm_dp_ps(__m128 __X, __m128 __Y, const int __M) {
return (__m128)__builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, __M);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dp_pd(__m128d __X, __m128d __Y, const int __M) {
__funline __m128d _mm_dp_pd(__m128d __X, __m128d __Y, const int __M) {
return (__m128d)__builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, __M);
}
#else
@ -181,76 +151,52 @@ extern __inline __m128d
(int)(M)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
__funline __m128i _mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
return (__m128i)((__v2di)__X == (__v2di)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi8(__m128i __X, __m128i __Y) {
__funline __m128i _mm_min_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pminsb128((__v16qi)__X, (__v16qi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi8(__m128i __X, __m128i __Y) {
__funline __m128i _mm_max_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmaxsb128((__v16qi)__X, (__v16qi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_min_epu16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pminuw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_max_epu16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmaxuw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_min_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pminsd128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_max_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmaxsd128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_min_epu32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pminud128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_max_epu32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmaxud128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_mullo_epi32(__m128i __X, __m128i __Y) {
return (__m128i)((__v4su)__X * (__v4su)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_mul_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__X, (__v4si)__Y);
}
#ifdef __OPTIMIZE__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_ps(__m128 __D, __m128 __S, const int __N) {
__funline __m128 _mm_insert_ps(__m128 __D, __m128 __S, const int __N) {
return (__m128)__builtin_ia32_insertps128((__v4sf)__D, (__v4sf)__S, __N);
}
#else
@ -262,9 +208,7 @@ extern __inline __m128
#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
#ifdef __OPTIMIZE__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_ps(__m128 __X, const int __N) {
__funline int _mm_extract_ps(__m128 __X, const int __N) {
union {
int i;
float f;
@ -291,22 +235,16 @@ extern __inline int
_mm_insert_ps(_mm_setzero_ps(), (X), _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi8(__m128i __D, int __S, const int __N) {
__funline __m128i _mm_insert_epi8(__m128i __D, int __S, const int __N) {
return (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)__D, __S, __N);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi32(__m128i __D, int __S, const int __N) {
__funline __m128i _mm_insert_epi32(__m128i __D, int __S, const int __N) {
return (__m128i)__builtin_ia32_vec_set_v4si((__v4si)__D, __S, __N);
}
#ifdef __x86_64__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi64(__m128i __D, long long __S, const int __N) {
__funline __m128i _mm_insert_epi64(__m128i __D, long long __S, const int __N) {
return (__m128i)__builtin_ia32_vec_set_v2di((__v2di)__D, __S, __N);
}
#endif
@ -327,22 +265,16 @@ extern __inline __m128i
#endif
#ifdef __OPTIMIZE__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8(__m128i __X, const int __N) {
__funline int _mm_extract_epi8(__m128i __X, const int __N) {
return (unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)__X, __N);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi32(__m128i __X, const int __N) {
__funline int _mm_extract_epi32(__m128i __X, const int __N) {
return __builtin_ia32_vec_ext_v4si((__v4si)__X, __N);
}
#ifdef __x86_64__
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi64(__m128i __X, const int __N) {
__funline long long _mm_extract_epi64(__m128i __X, const int __N) {
return __builtin_ia32_vec_ext_v2di((__v2di)__X, __N);
}
#endif
@ -359,94 +291,64 @@ extern __inline long long
#endif
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_minpos_epu16(__m128i __X) {
__funline __m128i _mm_minpos_epu16(__m128i __X) {
return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi32(__m128i __X) {
__funline __m128i _mm_cvtepi8_epi32(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxbd128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi32(__m128i __X) {
__funline __m128i _mm_cvtepi16_epi32(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxwd128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi64(__m128i __X) {
__funline __m128i _mm_cvtepi8_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxbq128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi64(__m128i __X) {
__funline __m128i _mm_cvtepi32_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxdq128((__v4si)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi64(__m128i __X) {
__funline __m128i _mm_cvtepi16_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxwq128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi16(__m128i __X) {
__funline __m128i _mm_cvtepi8_epi16(__m128i __X) {
return (__m128i)__builtin_ia32_pmovsxbw128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi32(__m128i __X) {
__funline __m128i _mm_cvtepu8_epi32(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxbd128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi32(__m128i __X) {
__funline __m128i _mm_cvtepu16_epi32(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxwd128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi64(__m128i __X) {
__funline __m128i _mm_cvtepu8_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxbq128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_epi64(__m128i __X) {
__funline __m128i _mm_cvtepu32_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxdq128((__v4si)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi64(__m128i __X) {
__funline __m128i _mm_cvtepu16_epi64(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxwq128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi16(__m128i __X) {
__funline __m128i _mm_cvtepu8_epi16(__m128i __X) {
return (__m128i)__builtin_ia32_pmovzxbw128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_packus_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_packusdw128((__v4si)__X, (__v4si)__Y);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mpsadbw_epu8(__m128i __X, __m128i __Y, const int __M) {
__funline __m128i _mm_mpsadbw_epu8(__m128i __X, __m128i __Y, const int __M) {
return (__m128i)__builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, __M);
}
#else
@ -455,9 +357,7 @@ extern __inline __m128i
(__v16qi)(__m128i)(Y), (int)(M)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_load_si128(__m128i *__X) {
__funline __m128i _mm_stream_load_si128(__m128i *__X) {
return (__m128i)__builtin_ia32_movntdqa((__v2di *)__X);
}
@ -489,28 +389,22 @@ extern __inline __m128i
#define _SIDD_UNIT_MASK 0x40
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrm(__m128i __X, __m128i __Y, const int __M) {
__funline __m128i _mm_cmpistrm(__m128i __X, __m128i __Y, const int __M) {
return (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistri(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistri(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistri128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrm(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline __m128i _mm_cmpestrm(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)__X, __LX, (__v16qi)__Y,
__LY, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestri(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestri(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestri128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
@ -533,67 +427,52 @@ extern __inline int
#endif
#ifdef __OPTIMIZE__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistra(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistra(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistria128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrc(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistrc(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistric128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistro(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistro(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistrio128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrs(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistrs(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistris128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpistrz(__m128i __X, __m128i __Y, const int __M) {
__funline int _mm_cmpistrz(__m128i __X, __m128i __Y, const int __M) {
return __builtin_ia32_pcmpistriz128((__v16qi)__X, (__v16qi)__Y, __M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestra(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestra(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestria128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrc(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestrc(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestric128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestro(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestro(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestrio128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrs(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestrs(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestris128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpestrz(__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) {
__funline int _mm_cmpestrz(__m128i __X, int __LX, __m128i __Y, int __LY,
const int __M) {
return __builtin_ia32_pcmpestriz128((__v16qi)__X, __LX, (__v16qi)__Y, __LY,
__M);
}
@ -636,9 +515,7 @@ extern __inline int
(int)(M)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
__funline __m128i _mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
return (__m128i)((__v2di)__X > (__v2di)__Y);
}
@ -667,28 +544,21 @@ extern __inline __m128i
#endif /* __SSE4_1__ */
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u8(unsigned int __C, unsigned char __V) {
__funline unsigned int _mm_crc32_u8(unsigned int __C, unsigned char __V) {
return __builtin_ia32_crc32qi(__C, __V);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u16(unsigned int __C, unsigned short __V) {
__funline unsigned int _mm_crc32_u16(unsigned int __C, unsigned short __V) {
return __builtin_ia32_crc32hi(__C, __V);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u32(unsigned int __C, unsigned int __V) {
__funline unsigned int _mm_crc32_u32(unsigned int __C, unsigned int __V) {
return __builtin_ia32_crc32si(__C, __V);
}
#ifdef __x86_64__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
__funline unsigned long long _mm_crc32_u64(unsigned long long __C,
unsigned long long __V) {
return __builtin_ia32_crc32di(__C, __V);
}
#endif

View file

@ -12,9 +12,7 @@
#endif /* __TBM__ */
#ifdef __OPTIMIZE__
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bextri_u32(unsigned int __X, const unsigned int __I) {
__funline unsigned int __bextri_u32(unsigned int __X, const unsigned int __I) {
return __builtin_ia32_bextri_u32(__X, __I);
}
#else
@ -23,65 +21,46 @@ extern __inline unsigned int
(unsigned int)(I)))
#endif /*__OPTIMIZE__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcfill_u32(unsigned int __X) {
__funline unsigned int __blcfill_u32(unsigned int __X) {
return __X & (__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blci_u32(unsigned int __X) {
__funline unsigned int __blci_u32(unsigned int __X) {
return __X | ~(__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcic_u32(unsigned int __X) {
__funline unsigned int __blcic_u32(unsigned int __X) {
return ~__X & (__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcmsk_u32(unsigned int __X) {
__funline unsigned int __blcmsk_u32(unsigned int __X) {
return __X ^ (__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcs_u32(unsigned int __X) {
__funline unsigned int __blcs_u32(unsigned int __X) {
return __X | (__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsfill_u32(unsigned int __X) {
__funline unsigned int __blsfill_u32(unsigned int __X) {
return __X | (__X - 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsic_u32(unsigned int __X) {
__funline unsigned int __blsic_u32(unsigned int __X) {
return ~__X | (__X - 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__t1mskc_u32(unsigned int __X) {
__funline unsigned int __t1mskc_u32(unsigned int __X) {
return ~__X | (__X + 1);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzmsk_u32(unsigned int __X) {
__funline unsigned int __tzmsk_u32(unsigned int __X) {
return ~__X & (__X - 1);
}
#ifdef __x86_64__
#ifdef __OPTIMIZE__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bextri_u64(unsigned long long __X, const unsigned int __I) {
__funline unsigned long long __bextri_u64(unsigned long long __X,
const unsigned int __I) {
return __builtin_ia32_bextri_u64(__X, __I);
}
#else
@ -90,57 +69,39 @@ extern __inline unsigned long long
(unsigned long long)(I)))
#endif /*__OPTIMIZE__ */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcfill_u64(unsigned long long __X) {
__funline unsigned long long __blcfill_u64(unsigned long long __X) {
return __X & (__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blci_u64(unsigned long long __X) {
__funline unsigned long long __blci_u64(unsigned long long __X) {
return __X | ~(__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcic_u64(unsigned long long __X) {
__funline unsigned long long __blcic_u64(unsigned long long __X) {
return ~__X & (__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcmsk_u64(unsigned long long __X) {
__funline unsigned long long __blcmsk_u64(unsigned long long __X) {
return __X ^ (__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blcs_u64(unsigned long long __X) {
__funline unsigned long long __blcs_u64(unsigned long long __X) {
return __X | (__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsfill_u64(unsigned long long __X) {
__funline unsigned long long __blsfill_u64(unsigned long long __X) {
return __X | (__X - 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsic_u64(unsigned long long __X) {
__funline unsigned long long __blsic_u64(unsigned long long __X) {
return ~__X | (__X - 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__t1mskc_u64(unsigned long long __X) {
__funline unsigned long long __t1mskc_u64(unsigned long long __X) {
return ~__X | (__X + 1);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzmsk_u64(unsigned long long __X) {
__funline unsigned long long __tzmsk_u64(unsigned long long __X) {
return ~__X & (__X - 1);
}

View file

@ -9,160 +9,108 @@
#define __DISABLE_SSSE3__
#endif /* __SSSE3__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hadd_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hadd_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phaddd128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hadds_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hadd_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phaddw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hadd_pi32(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phaddd((__v2si)__X, (__v2si)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hadds_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phaddsw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hsub_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hsub_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phsubd128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_hsubs_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hsub_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phsubw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hsub_pi32(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phsubd((__v2si)__X, (__v2si)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_hsubs_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_phsubsw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_maddubs_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__X, (__v16qi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_maddubs_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__X, (__v8qi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_mulhrs_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_mulhrs_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8(__m128i __X, __m128i __Y) {
__funline __m128i _mm_shuffle_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__X, (__v16qi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8(__m64 __X, __m64 __Y) {
__funline __m64 _mm_shuffle_pi8(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_pshufb((__v8qi)__X, (__v8qi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8(__m128i __X, __m128i __Y) {
__funline __m128i _mm_sign_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psignb128((__v16qi)__X, (__v16qi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16(__m128i __X, __m128i __Y) {
__funline __m128i _mm_sign_epi16(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psignw128((__v8hi)__X, (__v8hi)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32(__m128i __X, __m128i __Y) {
__funline __m128i _mm_sign_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psignd128((__v4si)__X, (__v4si)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8(__m64 __X, __m64 __Y) {
__funline __m64 _mm_sign_pi8(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_psignb((__v8qi)__X, (__v8qi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16(__m64 __X, __m64 __Y) {
__funline __m64 _mm_sign_pi16(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_psignw((__v4hi)__X, (__v4hi)__Y);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32(__m64 __X, __m64 __Y) {
__funline __m64 _mm_sign_pi32(__m64 __X, __m64 __Y) {
return (__m64)__builtin_ia32_psignd((__v2si)__X, (__v2si)__Y);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) {
__funline __m128i _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) {
return (__m128i)__builtin_ia32_palignr128((__v2di)__X, (__v2di)__Y, __N * 8);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) {
__funline __m64 _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) {
return (__m64)__builtin_ia32_palignr((__v1di)__X, (__v1di)__Y, __N * 8);
}
#else
@ -174,39 +122,27 @@ extern __inline __m64
(int)(N)*8))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8(__m128i __X) {
__funline __m128i _mm_abs_epi8(__m128i __X) {
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16(__m128i __X) {
__funline __m128i _mm_abs_epi16(__m128i __X) {
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__X);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32(__m128i __X) {
__funline __m128i _mm_abs_epi32(__m128i __X) {
return (__m128i)__builtin_ia32_pabsd128((__v4si)__X);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8(__m64 __X) {
__funline __m64 _mm_abs_pi8(__m64 __X) {
return (__m64)__builtin_ia32_pabsb((__v8qi)__X);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16(__m64 __X) {
__funline __m64 _mm_abs_pi16(__m64 __X) {
return (__m64)__builtin_ia32_pabsw((__v4hi)__X);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32(__m64 __X) {
__funline __m64 _mm_abs_pi32(__m64 __X) {
return (__m64)__builtin_ia32_pabsd((__v2si)__X);
}

View file

@ -9,27 +9,19 @@
#define __DISABLE_VAES__
#endif /* __VAES__ */
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_aesdec_epi128(__m256i __A, __m256i __B) {
__funline __m256i _mm256_aesdec_epi128(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vaesdec_v32qi((__v32qi)__A, (__v32qi)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
__funline __m256i _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vaesdeclast_v32qi((__v32qi)__A, (__v32qi)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_aesenc_epi128(__m256i __A, __m256i __B) {
__funline __m256i _mm256_aesenc_epi128(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vaesenc_v32qi((__v32qi)__A, (__v32qi)__B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
__funline __m256i _mm256_aesenclast_epi128(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_vaesenclast_v32qi((__v32qi)__A, (__v32qi)__B);
}
@ -44,27 +36,19 @@ extern __inline __m256i
#define __DISABLE_VAESF__
#endif /* __VAES__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_aesdec_epi128(__m512i __A, __m512i __B) {
__funline __m512i _mm512_aesdec_epi128(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vaesdec_v64qi((__v64qi)__A, (__v64qi)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
__funline __m512i _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vaesdeclast_v64qi((__v64qi)__A, (__v64qi)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_aesenc_epi128(__m512i __A, __m512i __B) {
__funline __m512i _mm512_aesenc_epi128(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vaesenc_v64qi((__v64qi)__A, (__v64qi)__B);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
__funline __m512i _mm512_aesenclast_epi128(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vaesenclast_v64qi((__v64qi)__A, (__v64qi)__B);
}

View file

@ -12,9 +12,8 @@
#endif /* __VPCLMULQDQF__ */
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_clmulepi64_epi128(__m512i __A, __m512i __B, const int __C) {
__funline __m512i _mm512_clmulepi64_epi128(__m512i __A, __m512i __B,
const int __C) {
return (__m512i)__builtin_ia32_vpclmulqdq_v8di((__v8di)__A, (__v8di)__B, __C);
}
#else
@ -35,9 +34,8 @@ extern __inline __m512i
#endif /* __VPCLMULQDQ__ */
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_clmulepi64_epi128(__m256i __A, __m256i __B, const int __C) {
__funline __m256i _mm256_clmulepi64_epi128(__m256i __A, __m256i __B,
const int __C) {
return (__m256i)__builtin_ia32_vpclmulqdq_v4di((__v4di)__A, (__v4di)__B, __C);
}
#else

View file

@ -11,21 +11,15 @@
#define __DISABLE_WAITPKG__
#endif /* __WAITPKG__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_umonitor(void *__A) {
__funline void _umonitor(void *__A) {
__builtin_ia32_umonitor(__A);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_umwait(unsigned int __A, unsigned long long __B) {
__funline unsigned char _umwait(unsigned int __A, unsigned long long __B) {
return __builtin_ia32_umwait(__A, __B);
}
extern __inline unsigned char
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tpause(unsigned int __A, unsigned long long __B) {
__funline unsigned char _tpause(unsigned int __A, unsigned long long __B) {
return __builtin_ia32_tpause(__A, __B);
}

View file

@ -11,9 +11,7 @@
#define __DISABLE_WBNOINVD__
#endif /* __WBNOINVD__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wbnoinvd(void) {
__funline void _wbnoinvd(void) {
__builtin_ia32_wbnoinvd();
}

View file

@ -9,40 +9,28 @@
#define __DISABLE_AES__
#endif /* __AES__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdec_si128(__m128i __X, __m128i __Y) {
__funline __m128i _mm_aesdec_si128(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_aesdec128((__v2di)__X, (__v2di)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdeclast_si128(__m128i __X, __m128i __Y) {
__funline __m128i _mm_aesdeclast_si128(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__X, (__v2di)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenc_si128(__m128i __X, __m128i __Y) {
__funline __m128i _mm_aesenc_si128(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_aesenc128((__v2di)__X, (__v2di)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenclast_si128(__m128i __X, __m128i __Y) {
__funline __m128i _mm_aesenclast_si128(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_aesenclast128((__v2di)__X, (__v2di)__Y);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesimc_si128(__m128i __X) {
__funline __m128i _mm_aesimc_si128(__m128i __X) {
return (__m128i)__builtin_ia32_aesimc128((__v2di)__X);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aeskeygenassist_si128(__m128i __X, const int __C) {
__funline __m128i _mm_aeskeygenassist_si128(__m128i __X, const int __C) {
return (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)__X, __C);
}
#else
@ -62,9 +50,7 @@ extern __inline __m128i
#endif /* __PCLMUL__ */
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I) {
__funline __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I) {
return (__m128i)__builtin_ia32_pclmulqdq128((__v2di)__X, (__v2di)__Y, __I);
}
#else

File diff suppressed because it is too large Load diff

View file

@ -13,192 +13,134 @@
#define __DISABLE_XOP__
#endif /* __XOP__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B,
(__v8hi)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B,
(__v2di)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B,
(__v4si)__C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B,
(__v4si)__C);
}
/* Packed Integer Horizontal Add and Subtract */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddw_epi8(__m128i __A) {
__funline __m128i _mm_haddw_epi8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epi8(__m128i __A) {
__funline __m128i _mm_haddd_epi8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi8(__m128i __A) {
__funline __m128i _mm_haddq_epi8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epi16(__m128i __A) {
__funline __m128i _mm_haddd_epi16(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi16(__m128i __A) {
__funline __m128i _mm_haddq_epi16(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epi32(__m128i __A) {
__funline __m128i _mm_haddq_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddw_epu8(__m128i __A) {
__funline __m128i _mm_haddw_epu8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epu8(__m128i __A) {
__funline __m128i _mm_haddd_epu8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu8(__m128i __A) {
__funline __m128i _mm_haddq_epu8(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddd_epu16(__m128i __A) {
__funline __m128i _mm_haddd_epu16(__m128i __A) {
return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu16(__m128i __A) {
__funline __m128i _mm_haddq_epu16(__m128i __A) {
return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_haddq_epu32(__m128i __A) {
__funline __m128i _mm_haddq_epu32(__m128i __A) {
return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubw_epi8(__m128i __A) {
__funline __m128i _mm_hsubw_epi8(__m128i __A) {
return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubd_epi16(__m128i __A) {
__funline __m128i _mm_hsubd_epi16(__m128i __A) {
return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubq_epi32(__m128i __A) {
__funline __m128i _mm_hsubq_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
}
/* Vector conditional move and permute */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) {
__funline __m128i _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) {
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B,
(__v16qi)__C);
}
@ -206,52 +148,36 @@ extern __inline __m128i
/* Packed Integer Rotates and Shifts
Rotates - Non-Immediate form */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_rot_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_rot_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_rot_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rot_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_rot_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
}
#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi8(__m128i __A, const int __B) {
__funline __m128i _mm_roti_epi8(__m128i __A, const int __B) {
return (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi16(__m128i __A, const int __B) {
__funline __m128i _mm_roti_epi16(__m128i __A, const int __B) {
return (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi32(__m128i __A, const int __B) {
__funline __m128i _mm_roti_epi32(__m128i __A, const int __B) {
return (__m128i)__builtin_ia32_vprotdi((__v4si)__A, __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_roti_epi64(__m128i __A, const int __B) {
__funline __m128i _mm_roti_epi64(__m128i __A, const int __B) {
return (__m128i)__builtin_ia32_vprotqi((__v2di)__A, __B);
}
#else
@ -265,501 +191,341 @@ extern __inline __m128i
((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (int)(N)))
#endif
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_shl_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_shl_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_shl_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shl_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_shl_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sha_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_sha_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomequb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomnequb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epu8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueub((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomequw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomnequw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epu16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueuw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomequd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomnequd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epu32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueud((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomequq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomnequq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epu64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epu64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueuq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomeqb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomneqb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi8(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueb((__v16qi)__A, (__v16qi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomlew((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgew((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomeqw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomneqw((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalsew((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi16(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtruew((__v8hi)__A, (__v8hi)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomled((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomged((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomeqd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomneqd((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalsed((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi32(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrued((__v4si)__A, (__v4si)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comlt_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comlt_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomltq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comle_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comle_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomleq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comgt_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comgt_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgtq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comge_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comge_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomgeq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comeq_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comeq_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomeqq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comneq_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comneq_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomneqq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comfalse_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comfalse_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomfalseq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comtrue_epi64(__m128i __A, __m128i __B) {
__funline __m128i _mm_comtrue_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_vpcomtrueq((__v2di)__A, (__v2di)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_ps(__m128 __A) {
__funline __m128 _mm_frcz_ps(__m128 __A) {
return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_pd(__m128d __A) {
__funline __m128d _mm_frcz_pd(__m128d __A) {
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_ss(__m128 __A, __m128 __B) {
__funline __m128 _mm_frcz_ss(__m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_movss(
(__v4sf)__A, (__v4sf)__builtin_ia32_vfrczss((__v4sf)__B));
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_frcz_sd(__m128d __A, __m128d __B) {
__funline __m128d _mm_frcz_sd(__m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_movsd(
(__v2df)__A, (__v2df)__builtin_ia32_vfrczsd((__v2df)__B));
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_frcz_ps(__m256 __A) {
__funline __m256 _mm256_frcz_ps(__m256 __A) {
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_frcz_pd(__m256d __A) {
__funline __m256d _mm256_frcz_pd(__m256d __A) {
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
}
#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_pd(__m128d __X, __m128d __Y, __m128i __C, const int __I) {
__funline __m128d _mm_permute2_pd(__m128d __X, __m128d __Y, __m128i __C,
const int __I) {
return (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y,
(__v2di)__C, __I);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_pd(__m256d __X, __m256d __Y, __m256i __C, const int __I) {
__funline __m256d _mm256_permute2_pd(__m256d __X, __m256d __Y, __m256i __C,
const int __I) {
return (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y,
(__v4di)__C, __I);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_ps(__m128 __X, __m128 __Y, __m128i __C, const int __I) {
__funline __m128 _mm_permute2_ps(__m128 __X, __m128 __Y, __m128i __C,
const int __I) {
return (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y,
(__v4si)__C, __I);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_ps(__m256 __X, __m256 __Y, __m256i __C, const int __I) {
__funline __m256 _mm256_peeeeeeermute2_ps(__m256 __X, __m256 __Y, __m256i __C,
const int __I) {
return (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y,
(__v8si)__C, __I);
}

View file

@ -11,16 +11,12 @@
#define __DISABLE_XSAVEC__
#endif /* __XSAVEC__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsavec(void *__P, long long __M) {
__funline void _xsavec(void *__P, long long __M) {
__builtin_ia32_xsavec(__P, __M);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsavec64(void *__P, long long __M) {
__funline void _xsavec64(void *__P, long long __M) {
__builtin_ia32_xsavec64(__P, __M);
}
#endif

View file

@ -11,40 +11,28 @@
#define __DISABLE_XSAVE__
#endif /* __XSAVE__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsave(void *__P, long long __M) {
__funline void _xsave(void *__P, long long __M) {
__builtin_ia32_xsave(__P, __M);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xrstor(void *__P, long long __M) {
__funline void _xrstor(void *__P, long long __M) {
__builtin_ia32_xrstor(__P, __M);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsetbv(unsigned int __A, long long __V) {
__funline void _xsetbv(unsigned int __A, long long __V) {
__builtin_ia32_xsetbv(__A, __V);
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xgetbv(unsigned int __A) {
__funline long long _xgetbv(unsigned int __A) {
return __builtin_ia32_xgetbv(__A);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsave64(void *__P, long long __M) {
__funline void _xsave64(void *__P, long long __M) {
__builtin_ia32_xsave64(__P, __M);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xrstor64(void *__P, long long __M) {
__funline void _xrstor64(void *__P, long long __M) {
__builtin_ia32_xrstor64(__P, __M);
}
#endif

View file

@ -11,16 +11,12 @@
#define __DISABLE_XSAVEOPT__
#endif /* __XSAVEOPT__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsaveopt(void *__P, long long __M) {
__funline void _xsaveopt(void *__P, long long __M) {
__builtin_ia32_xsaveopt(__P, __M);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsaveopt64(void *__P, long long __M) {
__funline void _xsaveopt64(void *__P, long long __M) {
__builtin_ia32_xsaveopt64(__P, __M);
}
#endif

View file

@ -11,28 +11,20 @@
#define __DISABLE_XSAVES__
#endif /* __XSAVES__ */
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsaves(void *__P, long long __M) {
__funline void _xsaves(void *__P, long long __M) {
__builtin_ia32_xsaves(__P, __M);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xrstors(void *__P, long long __M) {
__funline void _xrstors(void *__P, long long __M) {
__builtin_ia32_xrstors(__P, __M);
}
#ifdef __x86_64__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xrstors64(void *__P, long long __M) {
__funline void _xrstors64(void *__P, long long __M) {
__builtin_ia32_xrstors64(__P, __M);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsaves64(void *__P, long long __M) {
__funline void _xsaves64(void *__P, long long __M) {
__builtin_ia32_xsaves64(__P, __M);
}
#endif

View file

@ -11,9 +11,7 @@
#define __DISABLE_RTM__
#endif /* __RTM__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xtest(void) {
__funline int _xtest(void) {
return __builtin_ia32_xtest();
}

View file

@ -26,6 +26,7 @@
(cosmo
'("__msabi"
"__funline"
"function"
"offsetof"
"microarchitecture"