mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
834 lines
25 KiB
C
834 lines
25 KiB
C
#ifndef _MMINTRIN_H_INCLUDED
|
|
#define _MMINTRIN_H_INCLUDED
|
|
#ifdef __x86_64__
|
|
|
|
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
|
|
#pragma GCC push_options
|
|
#ifdef __x86_64__
|
|
#pragma GCC target("sse,mmx")
|
|
#else
|
|
#pragma GCC target("mmx")
|
|
#endif
|
|
#define __DISABLE_MMX__
|
|
#endif /* __MMX__ */
|
|
|
|
typedef int __m64 __attribute__((__vector_size__(8), __may_alias__));
|
|
|
|
typedef int __m64_u
|
|
__attribute__((__vector_size__(8), __may_alias__, __aligned__(1)));
|
|
|
|
typedef int __v2si __attribute__((__vector_size__(8)));
|
|
typedef short __v4hi __attribute__((__vector_size__(8)));
|
|
typedef char __v8qi __attribute__((__vector_size__(8)));
|
|
typedef long long __v1di __attribute__((__vector_size__(8)));
|
|
typedef float __v2sf __attribute__((__vector_size__(8)));
|
|
|
|
extern __inline void
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_empty(void) {
|
|
__builtin_ia32_emms();
|
|
}
|
|
|
|
extern __inline void
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_empty(void) {
|
|
_mm_empty();
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtsi32_si64(int __i) {
|
|
return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_from_int(int __i) {
|
|
return _mm_cvtsi32_si64(__i);
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_from_int64(long long __i) {
|
|
return (__m64)__i;
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtsi64_m64(long long __i) {
|
|
return (__m64)__i;
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtsi64x_si64(long long __i) {
|
|
return (__m64)__i;
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set_pi64x(long long __i) {
|
|
return (__m64)__i;
|
|
}
|
|
#endif
|
|
|
|
extern __inline int
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtsi64_si32(__m64 __i) {
|
|
return __builtin_ia32_vec_ext_v2si((__v2si)__i, 0);
|
|
}
|
|
|
|
extern __inline int
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_to_int(__m64 __i) {
|
|
return _mm_cvtsi64_si32(__i);
|
|
}
|
|
|
|
#ifdef __x86_64__
|
|
|
|
extern __inline long long
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_to_int64(__m64 __i) {
|
|
return (long long)__i;
|
|
}
|
|
|
|
extern __inline long long
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtm64_si64(__m64 __i) {
|
|
return (long long)__i;
|
|
}
|
|
|
|
extern __inline long long
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cvtsi64_si64x(__m64 __i) {
|
|
return (long long)__i;
|
|
}
|
|
#endif
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_packs_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_packsswb(__m64 __m1, __m64 __m2) {
|
|
return _mm_packs_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_packs_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_packssdw(__m64 __m1, __m64 __m2) {
|
|
return _mm_packs_pi32(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_packs_pu16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_packuswb(__m64 __m1, __m64 __m2) {
|
|
return _mm_packs_pu16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpckhbw(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpackhi_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpckhwd(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpackhi_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpckhdq(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpackhi_pi32(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpcklbw(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpacklo_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpcklwd(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpacklo_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_punpckldq(__m64 __m1, __m64 __m2) {
|
|
return _mm_unpacklo_pi32(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_add_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddb(__m64 __m1, __m64 __m2) {
|
|
return _mm_add_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_add_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddw(__m64 __m1, __m64 __m2) {
|
|
return _mm_add_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_add_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddd(__m64 __m1, __m64 __m2) {
|
|
return _mm_add_pi32(__m1, __m2);
|
|
}
|
|
|
|
#ifndef __SSE2__
|
|
#pragma GCC push_options
|
|
#pragma GCC target("sse2,mmx")
|
|
#define __DISABLE_SSE2__
|
|
#endif /* __SSE2__ */
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_add_si64(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddq((__v1di)__m1, (__v1di)__m2);
|
|
}
|
|
#ifdef __DISABLE_SSE2__
|
|
#undef __DISABLE_SSE2__
|
|
#pragma GCC pop_options
|
|
#endif /* __DISABLE_SSE2__ */
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_adds_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddsb(__m64 __m1, __m64 __m2) {
|
|
return _mm_adds_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_adds_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddsw(__m64 __m1, __m64 __m2) {
|
|
return _mm_adds_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_adds_pu8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddusb(__m64 __m1, __m64 __m2) {
|
|
return _mm_adds_pu8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_adds_pu16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_paddusw(__m64 __m1, __m64 __m2) {
|
|
return _mm_adds_pu16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sub_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubb(__m64 __m1, __m64 __m2) {
|
|
return _mm_sub_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sub_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubw(__m64 __m1, __m64 __m2) {
|
|
return _mm_sub_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sub_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubd(__m64 __m1, __m64 __m2) {
|
|
return _mm_sub_pi32(__m1, __m2);
|
|
}
|
|
|
|
#ifndef __SSE2__
|
|
#pragma GCC push_options
|
|
#pragma GCC target("sse2,mmx")
|
|
#define __DISABLE_SSE2__
|
|
#endif /* __SSE2__ */
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sub_si64(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubq((__v1di)__m1, (__v1di)__m2);
|
|
}
|
|
#ifdef __DISABLE_SSE2__
|
|
#undef __DISABLE_SSE2__
|
|
#pragma GCC pop_options
|
|
#endif /* __DISABLE_SSE2__ */
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_subs_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubsb(__m64 __m1, __m64 __m2) {
|
|
return _mm_subs_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_subs_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubsw(__m64 __m1, __m64 __m2) {
|
|
return _mm_subs_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_subs_pu8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubusb(__m64 __m1, __m64 __m2) {
|
|
return _mm_subs_pu8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_subs_pu16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psubusw(__m64 __m1, __m64 __m2) {
|
|
return _mm_subs_pu16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_madd_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pmaddwd(__m64 __m1, __m64 __m2) {
|
|
return _mm_madd_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pmulhw(__m64 __m1, __m64 __m2) {
|
|
return _mm_mulhi_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_mullo_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pmullw(__m64 __m1, __m64 __m2) {
|
|
return _mm_mullo_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sll_pi16(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psllw((__v4hi)__m, (__v4hi)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psllw(__m64 __m, __m64 __count) {
|
|
return _mm_sll_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_slli_pi16(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psllwi(__m64 __m, int __count) {
|
|
return _mm_slli_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sll_pi32(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_pslld((__v2si)__m, (__v2si)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pslld(__m64 __m, __m64 __count) {
|
|
return _mm_sll_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_slli_pi32(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pslldi(__m64 __m, int __count) {
|
|
return _mm_slli_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sll_si64(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psllq((__v1di)__m, (__v1di)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psllq(__m64 __m, __m64 __count) {
|
|
return _mm_sll_si64(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_slli_si64(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psllqi(__m64 __m, int __count) {
|
|
return _mm_slli_si64(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sra_pi16(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psraw((__v4hi)__m, (__v4hi)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psraw(__m64 __m, __m64 __count) {
|
|
return _mm_sra_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srai_pi16(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrawi(__m64 __m, int __count) {
|
|
return _mm_srai_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_sra_pi32(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psrad((__v2si)__m, (__v2si)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrad(__m64 __m, __m64 __count) {
|
|
return _mm_sra_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srai_pi32(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psradi(__m64 __m, int __count) {
|
|
return _mm_srai_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srl_pi16(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psrlw((__v4hi)__m, (__v4hi)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrlw(__m64 __m, __m64 __count) {
|
|
return _mm_srl_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srli_pi16(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrlwi(__m64 __m, int __count) {
|
|
return _mm_srli_pi16(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srl_pi32(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psrld((__v2si)__m, (__v2si)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrld(__m64 __m, __m64 __count) {
|
|
return _mm_srl_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srli_pi32(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrldi(__m64 __m, int __count) {
|
|
return _mm_srli_pi32(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srl_si64(__m64 __m, __m64 __count) {
|
|
return (__m64)__builtin_ia32_psrlq((__v1di)__m, (__v1di)__count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrlq(__m64 __m, __m64 __count) {
|
|
return _mm_srl_si64(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_srli_si64(__m64 __m, int __count) {
|
|
return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_psrlqi(__m64 __m, int __count) {
|
|
return _mm_srli_si64(__m, __count);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_and_si64(__m64 __m1, __m64 __m2) {
|
|
return __builtin_ia32_pand(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pand(__m64 __m1, __m64 __m2) {
|
|
return _mm_and_si64(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_andnot_si64(__m64 __m1, __m64 __m2) {
|
|
return __builtin_ia32_pandn(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pandn(__m64 __m1, __m64 __m2) {
|
|
return _mm_andnot_si64(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_or_si64(__m64 __m1, __m64 __m2) {
|
|
return __builtin_ia32_por(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_por(__m64 __m1, __m64 __m2) {
|
|
return _mm_or_si64(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_xor_si64(__m64 __m1, __m64 __m2) {
|
|
return __builtin_ia32_pxor(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pxor(__m64 __m1, __m64 __m2) {
|
|
return _mm_xor_si64(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpeqb(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpeq_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpgtb(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpgt_pi8(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpeqw(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpeq_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpgtw(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpgt_pi16(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpeqd(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpeq_pi32(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
|
|
return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_m_pcmpgtd(__m64 __m1, __m64 __m2) {
|
|
return _mm_cmpgt_pi32(__m1, __m2);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_setzero_si64(void) {
|
|
return (__m64)0LL;
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set_pi32(int __i1, int __i0) {
|
|
return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
|
|
return (__m64)__builtin_ia32_vec_init_v4hi(__w0, __w1, __w2, __w3);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
|
|
char __b2, char __b1, char __b0) {
|
|
return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5,
|
|
__b6, __b7);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_setr_pi32(int __i0, int __i1) {
|
|
return _mm_set_pi32(__i1, __i0);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
|
|
return _mm_set_pi16(__w3, __w2, __w1, __w0);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4,
|
|
char __b5, char __b6, char __b7) {
|
|
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set1_pi32(int __i) {
|
|
return _mm_set_pi32(__i, __i);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set1_pi16(short __w) {
|
|
return _mm_set_pi16(__w, __w, __w, __w);
|
|
}
|
|
|
|
extern __inline __m64
|
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
|
_mm_set1_pi8(char __b) {
|
|
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
|
|
}
|
|
#ifdef __DISABLE_MMX__
|
|
#undef __DISABLE_MMX__
|
|
#pragma GCC pop_options
|
|
#endif /* __DISABLE_MMX__ */
|
|
|
|
#endif /* __x86_64__ */
|
|
#endif /* _MMINTRIN_H_INCLUDED */
|