cosmopolitan/third_party/intel/xmmintrin.internal.h

965 lines
36 KiB
C
Raw Normal View History

#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
2023-04-27 09:56:41 +00:00
#ifndef _XMMINTRIN_H_INCLUDED
#define _XMMINTRIN_H_INCLUDED
#include "third_party/intel/mm_malloc.internal.h"
#include "third_party/intel/mmintrin.internal.h"
enum _mm_hint {
_MM_HINT_ET0 = 7,
_MM_HINT_ET1 = 6,
_MM_HINT_T0 = 3,
_MM_HINT_T1 = 2,
_MM_HINT_T2 = 1,
_MM_HINT_NTA = 0
};
#ifdef __OPTIMIZE__
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_prefetch(const void *__P, enum _mm_hint __I) {
2023-04-27 09:56:41 +00:00
__builtin_prefetch(__P, (__I & 0x4) >> 2, __I & 0x3);
}
#else
#define _mm_prefetch(P, I) __builtin_prefetch((P), ((I & 0x4) >> 2), (I & 0x3))
#endif
#ifndef __SSE__
#pragma GCC push_options
#pragma GCC target("sse")
#define __DISABLE_SSE__
#endif
2023-04-27 09:56:41 +00:00
typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
typedef float __m128_u
__attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
typedef float __v4sf __attribute__((__vector_size__(16)));
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#define _MM_EXCEPT_MASK 0x003f
#define _MM_EXCEPT_INVALID 0x0001
#define _MM_EXCEPT_DENORM 0x0002
#define _MM_EXCEPT_DIV_ZERO 0x0004
#define _MM_EXCEPT_OVERFLOW 0x0008
#define _MM_EXCEPT_UNDERFLOW 0x0010
#define _MM_EXCEPT_INEXACT 0x0020
#define _MM_MASK_MASK 0x1f80
#define _MM_MASK_INVALID 0x0080
#define _MM_MASK_DENORM 0x0100
#define _MM_MASK_DIV_ZERO 0x0200
#define _MM_MASK_OVERFLOW 0x0400
#define _MM_MASK_UNDERFLOW 0x0800
#define _MM_MASK_INEXACT 0x1000
2023-04-27 09:56:41 +00:00
#define _MM_ROUND_MASK 0x6000
#define _MM_ROUND_NEAREST 0x0000
#define _MM_ROUND_DOWN 0x2000
#define _MM_ROUND_UP 0x4000
#define _MM_ROUND_TOWARD_ZERO 0x6000
#define _MM_FLUSH_ZERO_MASK 0x8000
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_ps(void) {
2023-04-27 09:56:41 +00:00
__m128 __Y = __Y;
return __Y;
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ps(void) {
2023-04-27 09:56:41 +00:00
return __extension__(__m128){0.0f, 0.0f, 0.0f, 0.0f};
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_addss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_subss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_mulss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_divss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_ss(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_sqrtss((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_ss(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_rcpss((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_ss(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_rsqrtss((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_minss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_maxss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)((__v4sf)__A + (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)((__v4sf)__A - (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)((__v4sf)__A * (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)((__v4sf)__A / (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_ps(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_sqrtps((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_ps(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_rcpps((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_ps(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_rsqrtps((__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_minps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_maxps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_andps(__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_andnps(__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_orps(__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_xorps(__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpeqss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpltss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpless((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movss(
(__v4sf)__A, (__v4sf)__builtin_ia32_cmpltss((__v4sf)__B, (__v4sf)__A));
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movss(
(__v4sf)__A, (__v4sf)__builtin_ia32_cmpless((__v4sf)__B, (__v4sf)__A));
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpneqss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpnltss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpnless((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movss(
(__v4sf)__A, (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__B, (__v4sf)__A));
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movss(
(__v4sf)__A, (__v4sf)__builtin_ia32_cmpnless((__v4sf)__B, (__v4sf)__A));
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpordss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpunordss((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpeqps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpltps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpleps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpgtps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpgeps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpneqps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpnltps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpnleps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpngtps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpngeps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpordps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cmpunordps((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comieq((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comilt((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comile((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comigt((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comige((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_comineq((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomieq((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomilt((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomile((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomigt((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomige((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_ucomineq((__v4sf)__A, (__v4sf)__B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si32(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvtss2si((__v4sf)__A);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_ss2si(__m128 __A) {
2023-04-27 09:56:41 +00:00
return _mm_cvtss_si32(__A);
}
#ifdef __x86_64__
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvtss2si64((__v4sf)__A);
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64x(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvtss2si64((__v4sf)__A);
}
#endif
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi32(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_ps2pi(__m128 __A) {
2023-04-27 09:56:41 +00:00
return _mm_cvtps_pi32(__A);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si32(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvttss2si((__v4sf)__A);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_ss2si(__m128 __A) {
2023-04-27 09:56:41 +00:00
return _mm_cvttss_si32(__A);
}
#ifdef __x86_64__
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si64(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvttss2si64((__v4sf)__A);
}
extern __inline long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttss_si64x(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_cvttss2si64((__v4sf)__A);
}
#endif
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_pi32(__m128 __A) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_ps2pi(__m128 __A) {
2023-04-27 09:56:41 +00:00
return _mm_cvttps_pi32(__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_ss(__m128 __A, int __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cvtsi2ss((__v4sf)__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_si2ss(__m128 __A, int __B) {
2023-04-27 09:56:41 +00:00
return _mm_cvtsi32_ss(__A, __B);
}
#ifdef __x86_64__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_ss(__m128 __A, long long __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_ss(__m128 __A, long long __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cvtsi642ss((__v4sf)__A, __B);
}
#endif
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_ps(__m128 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_cvtpi2ps((__v4sf)__A, (__v2si)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_pi2ps(__m128 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_cvtpi32_ps(__A, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi16_ps(__m64 __A) {
2023-04-27 09:56:41 +00:00
__v4hi __sign;
__v2si __hisi, __losi;
__v4sf __zero, __ra, __rb;
__sign = __builtin_ia32_pcmpgtw((__v4hi)0LL, (__v4hi)__A);
__losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, __sign);
__hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, __sign);
__zero = (__v4sf)_mm_setzero_ps();
__ra = __builtin_ia32_cvtpi2ps(__zero, __losi);
__rb = __builtin_ia32_cvtpi2ps(__ra, __hisi);
return (__m128)__builtin_ia32_movlhps(__ra, __rb);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpu16_ps(__m64 __A) {
2023-04-27 09:56:41 +00:00
__v2si __hisi, __losi;
__v4sf __zero, __ra, __rb;
__losi = (__v2si)__builtin_ia32_punpcklwd((__v4hi)__A, (__v4hi)0LL);
__hisi = (__v2si)__builtin_ia32_punpckhwd((__v4hi)__A, (__v4hi)0LL);
__zero = (__v4sf)_mm_setzero_ps();
__ra = __builtin_ia32_cvtpi2ps(__zero, __losi);
__rb = __builtin_ia32_cvtpi2ps(__ra, __hisi);
return (__m128)__builtin_ia32_movlhps(__ra, __rb);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi8_ps(__m64 __A) {
2023-04-27 09:56:41 +00:00
__v8qi __sign;
__sign = __builtin_ia32_pcmpgtb((__v8qi)0LL, (__v8qi)__A);
__A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, __sign);
return _mm_cvtpi16_ps(__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpu8_ps(__m64 __A) {
2023-04-27 09:56:41 +00:00
__A = (__m64)__builtin_ia32_punpcklbw((__v8qi)__A, (__v8qi)0LL);
return _mm_cvtpu16_ps(__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32x2_ps(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
__v4sf __zero = (__v4sf)_mm_setzero_ps();
__v4sf __sfa = __builtin_ia32_cvtpi2ps(__zero, (__v2si)__A);
__v4sf __sfb = __builtin_ia32_cvtpi2ps(__sfa, (__v2si)__B);
return (__m128)__builtin_ia32_movlhps(__sfa, __sfb);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi16(__m128 __A) {
2023-04-27 09:56:41 +00:00
__v4sf __hisf = (__v4sf)__A;
__v4sf __losf = __builtin_ia32_movhlps(__hisf, __hisf);
__v2si __hisi = __builtin_ia32_cvtps2pi(__hisf);
__v2si __losi = __builtin_ia32_cvtps2pi(__losf);
return (__m64)__builtin_ia32_packssdw(__hisi, __losi);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pi8(__m128 __A) {
2023-04-27 09:56:41 +00:00
__v4hi __tmp = (__v4hi)_mm_cvtps_pi16(__A);
return (__m64)__builtin_ia32_packsswb(__tmp, (__v4hi)0LL);
}
#ifdef __OPTIMIZE__
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_ps(__m128 __A, __m128 __B, int const __mask) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_shufps((__v4sf)__A, (__v4sf)__B, __mask);
}
#else
#define _mm_shuffle_ps(A, B, MASK) \
((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
(int)(MASK)))
#endif
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_unpckhps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_unpcklps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pi(__m128 __A, __m64 const *__P) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_loadhps((__v4sf)__A, (const __v2sf *)__P);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pi(__m64 *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_storehps((__v2sf *)__P, (__v4sf)__A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehl_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movhlps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movelh_ps(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_movlhps((__v4sf)__A, (__v4sf)__B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pi(__m128 __A, __m64 const *__P) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_ia32_loadlps((__v4sf)__A, (const __v2sf *)__P);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pi(__m64 *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_storelps((__v2sf *)__P, (__v4sf)__A);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_ps(__m128 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_movmskps((__v4sf)__A);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_getcsr(void) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_stmxcsr();
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_EXCEPTION_STATE(void) {
2023-04-27 09:56:41 +00:00
return _mm_getcsr() & _MM_EXCEPT_MASK;
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_EXCEPTION_MASK(void) {
2023-04-27 09:56:41 +00:00
return _mm_getcsr() & _MM_MASK_MASK;
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_ROUNDING_MODE(void) {
2023-04-27 09:56:41 +00:00
return _mm_getcsr() & _MM_ROUND_MASK;
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_GET_FLUSH_ZERO_MODE(void) {
2023-04-27 09:56:41 +00:00
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setcsr(unsigned int __I) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_ldmxcsr(__I);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_EXCEPTION_STATE(unsigned int __mask) {
2023-04-27 09:56:41 +00:00
_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_EXCEPTION_MASK(unsigned int __mask) {
2023-04-27 09:56:41 +00:00
_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_ROUNDING_MODE(unsigned int __mode) {
2023-04-27 09:56:41 +00:00
_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_MM_SET_FLUSH_ZERO_MODE(unsigned int __mode) {
2023-04-27 09:56:41 +00:00
_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ss(float __F) {
2023-04-27 09:56:41 +00:00
return __extension__(__m128)(__v4sf){__F, 0.0f, 0.0f, 0.0f};
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_ps(float __F) {
2023-04-27 09:56:41 +00:00
return __extension__(__m128)(__v4sf){__F, __F, __F, __F};
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ps1(float __F) {
2023-04-27 09:56:41 +00:00
return _mm_set1_ps(__F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ss(float const *__P) {
2023-04-27 09:56:41 +00:00
return _mm_set_ss(*__P);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_ps(float const *__P) {
2023-04-27 09:56:41 +00:00
return _mm_set1_ps(*__P);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ps1(float const *__P) {
2023-04-27 09:56:41 +00:00
return _mm_load1_ps(__P);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ps(float const *__P) {
2023-04-27 09:56:41 +00:00
return *(__m128 *)__P;
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_ps(float const *__P) {
2023-04-27 09:56:41 +00:00
return *(__m128_u *)__P;
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_ps(float const *__P) {
2023-04-27 09:56:41 +00:00
__v4sf __tmp = *(__v4sf *)__P;
return (__m128)__builtin_ia32_shufps(__tmp, __tmp, _MM_SHUFFLE(0, 1, 2, 3));
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_set_ps(const float __Z, const float __Y, const float __X, const float __W) {
2023-04-27 09:56:41 +00:00
return __extension__(__m128)(__v4sf){__W, __X, __Y, __Z};
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_ps(float __Z, float __Y, float __X, float __W) {
2023-04-27 09:56:41 +00:00
return __extension__(__m128)(__v4sf){__Z, __Y, __X, __W};
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ss(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
*__P = ((__v4sf)__A)[0];
}
extern __inline float
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_f32(__m128 __A) {
2023-04-27 09:56:41 +00:00
return ((__v4sf)__A)[0];
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ps(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
*(__m128 *)__P = __A;
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_ps(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
*(__m128_u *)__P = __A;
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_ps(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
__v4sf __va = (__v4sf)__A;
__v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 0, 0, 0));
_mm_storeu_ps(__P, __tmp);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ps1(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
_mm_store1_ps(__P, __A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_ps(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
__v4sf __va = (__v4sf)__A;
__v4sf __tmp = __builtin_ia32_shufps(__va, __va, _MM_SHUFFLE(0, 1, 2, 3));
_mm_store_ps(__P, __tmp);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_ss(__m128 __A, __m128 __B) {
2023-04-27 09:56:41 +00:00
return (__m128)__builtin_shuffle(
(__v4sf)__A, (__v4sf)__B,
__extension__(__attribute__((__vector_size__(16))) int){4, 1, 2, 3});
}
#ifdef __OPTIMIZE__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_pi16(__m64 const __A, int const __N) {
return (unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)__A, __N);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pextrw(__m64 const __A, int const __N) {
2023-04-27 09:56:41 +00:00
return _mm_extract_pi16(__A, __N);
}
#else
#define _mm_extract_pi16(A, N) \
((int)(unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)(__m64)(A), \
(int)(N)))
2023-04-27 09:56:41 +00:00
#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
#endif
#ifdef __OPTIMIZE__
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_pi16(__m64 const __A, int const __D, int const __N) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)__A, __D, __N);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pinsrw(__m64 const __A, int const __D, int const __N) {
2023-04-27 09:56:41 +00:00
return _mm_insert_pi16(__A, __D, __N);
}
#else
#define _mm_insert_pi16(A, D, N) \
((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)(__m64)(A), (int)(D), (int)(N)))
#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
#endif
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pi16(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pmaxsw((__v4hi)__A, (__v4hi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaxsw(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_max_pi16(__A, __B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pu8(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pmaxub((__v8qi)__A, (__v8qi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaxub(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_max_pu8(__A, __B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pi16(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pminsw((__v4hi)__A, (__v4hi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pminsw(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_min_pi16(__A, __B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pu8(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pminub((__v8qi)__A, (__v8qi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pminub(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_min_pu8(__A, __B);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8(__m64 __A) {
2023-04-27 09:56:41 +00:00
return __builtin_ia32_pmovmskb((__v8qi)__A);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmovmskb(__m64 __A) {
2023-04-27 09:56:41 +00:00
return _mm_movemask_pi8(__A);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pu16(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pmulhuw((__v4hi)__A, (__v4hi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhuw(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_mulhi_pu16(__A, __B);
}
#ifdef __OPTIMIZE__
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi16(__m64 __A, int const __N) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pshufw((__v4hi)__A, __N);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pshufw(__m64 __A, int const __N) {
2023-04-27 09:56:41 +00:00
return _mm_shuffle_pi16(__A, __N);
}
#else
#define _mm_shuffle_pi16(A, N) \
((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(A), (int)(N)))
#define _m_pshufw(A, N) _mm_shuffle_pi16(A, N)
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmove_si64(__m64 __A, __m64 __N, char *__P) {
#ifdef __MMX_WITH_SSE__
typedef long long __v2di __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
__v2di __A128 = __extension__(__v2di){((__v1di)__A)[0], 0};
__v2di __N128 = __extension__(__v2di){((__v1di)__N)[0], 0};
__SIZE_TYPE__ offset = ((__SIZE_TYPE__)__P) & 0xf;
if (offset) {
if (offset > 8) offset = 8;
__P = (char *)(((__SIZE_TYPE__)__P) - offset);
switch (offset) {
case 1:
__A128 = __builtin_ia32_pslldqi128(__A128, 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 8);
break;
case 2:
__A128 = __builtin_ia32_pslldqi128(__A128, 2 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 2 * 8);
break;
case 3:
__A128 = __builtin_ia32_pslldqi128(__A128, 3 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 3 * 8);
break;
case 4:
__A128 = __builtin_ia32_pslldqi128(__A128, 4 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 4 * 8);
break;
case 5:
__A128 = __builtin_ia32_pslldqi128(__A128, 5 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 5 * 8);
break;
case 6:
__A128 = __builtin_ia32_pslldqi128(__A128, 6 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 6 * 8);
break;
case 7:
__A128 = __builtin_ia32_pslldqi128(__A128, 7 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 7 * 8);
break;
case 8:
__A128 = __builtin_ia32_pslldqi128(__A128, 8 * 8);
__N128 = __builtin_ia32_pslldqi128(__N128, 8 * 8);
break;
default:
break;
}
}
__builtin_ia32_maskmovdqu((__v16qi)__A128, (__v16qi)__N128, __P);
#else
2023-04-27 09:56:41 +00:00
__builtin_ia32_maskmovq((__v8qi)__A, (__v8qi)__N, __P);
#endif
2023-04-27 09:56:41 +00:00
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_maskmovq(__m64 __A, __m64 __N, char *__P) {
2023-04-27 09:56:41 +00:00
_mm_maskmove_si64(__A, __N, __P);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_pu8(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pavgb((__v8qi)__A, (__v8qi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgb(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_avg_pu8(__A, __B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_pu16(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_pavgw((__v4hi)__A, (__v4hi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgw(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_avg_pu16(__A, __B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_pu8(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return (__m64)__builtin_ia32_psadbw((__v8qi)__A, (__v8qi)__B);
}
extern __inline __m64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psadbw(__m64 __A, __m64 __B) {
2023-04-27 09:56:41 +00:00
return _mm_sad_pu8(__A, __B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pi(__m64 *__P, __m64 __A) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_movntq((unsigned long long *)__P, (unsigned long long)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_ps(float *__P, __m128 __A) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_movntps(__P, (__v4sf)__A);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sfence(void) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_sfence();
}
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
do { \
__v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
__v4sf __t0 = __builtin_ia32_unpcklps(__r0, __r1); \
__v4sf __t1 = __builtin_ia32_unpcklps(__r2, __r3); \
__v4sf __t2 = __builtin_ia32_unpckhps(__r0, __r1); \
__v4sf __t3 = __builtin_ia32_unpckhps(__r2, __r3); \
(row0) = __builtin_ia32_movlhps(__t0, __t1); \
(row1) = __builtin_ia32_movhlps(__t1, __t0); \
(row2) = __builtin_ia32_movlhps(__t2, __t3); \
(row3) = __builtin_ia32_movhlps(__t3, __t2); \
} while (0)
#include "third_party/intel/emmintrin.internal.h"
#ifdef __DISABLE_SSE__
#undef __DISABLE_SSE__
#pragma GCC pop_options
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_pause(void) {
2023-04-27 09:56:41 +00:00
__builtin_ia32_pause();
}
#endif
#endif