mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-08-08 10:50:28 +00:00
added emmintrin and pmmintrin
This commit is contained in:
parent
7c5c2e2b66
commit
ca5a08cafc
2 changed files with 451 additions and 3 deletions
|
@ -240,5 +240,425 @@ struct thatispacked mayalias __usi128ma {
|
|||
|
||||
#define _mm_pause() asm("rep nop")
|
||||
|
||||
#define _mm_set_sd(DBL_0) ((__m128d){(double)(DBL_0), 0.0})
|
||||
|
||||
#define _mm_set1_pd(DBL_0) ((__m128d){(double)(DBL_0), (double)(DBL_0)})
|
||||
|
||||
#define _mm_set_pd1(DBL_0) (_mm_set1_pd((double)(DBL_0)))
|
||||
|
||||
#define _mm_set_pd(DBL_0, DBL_1) ((__m128d){(double)(DBL_1), (double)(DBL_0)})
|
||||
|
||||
#define _mm_setr_pd(DBL_0, DBL_1) ((__m128d){(double)(DBL_0), (double)(DBL_1)})
|
||||
|
||||
#define _mm_undefined_pd() \
|
||||
({ \
|
||||
__m128d __Y = __Y; \
|
||||
return __Y; \
|
||||
})
|
||||
|
||||
#define _mm_setzero_pd() ((__m128d){0.0, 0.0})
|
||||
|
||||
#define _mm_move_sd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_shuffle((__v2df)__A, (__v2df)__B, (__v2di){2, 1}))
|
||||
|
||||
#define _mm_load_pd(DBL_CONSTPTR_0) \
|
||||
(*(__m128d *)(double const *)(DBL_CONSTPTR_0))
|
||||
|
||||
#define _mm_loadu_pd(DBL_CONSTPTR_0) \
|
||||
(*(__m128d_u *)(double const *)(DBL_CONSTPTR_0))
|
||||
|
||||
#define _mm_load1_pd(DBL_CONSTPTR_0) \
|
||||
(_mm_set1_pd(*(double const *)(DBL_CONSTPTR_0)))
|
||||
|
||||
#define _mm_load_sd(DBL_CONSTPTR_0) \
|
||||
(_mm_set_sd(*(double const *)(DBL_CONSTPTR_0)))
|
||||
|
||||
#define _mm_load_pd1(DBL_CONSTPTR_0) \
|
||||
(_mm_load1_pd((double const *)(DBL_CONSTPTR_0)))
|
||||
|
||||
#define _mm_loadr_pd(DBL_CONSTPTR_0) \
|
||||
({ \
|
||||
__m128d __tmp = _mm_load_pd((double const *)(DBL_CONSTPTR_0)); \
|
||||
__builtin_ia32_shufpd(__tmp, __tmp, 1); \
|
||||
})
|
||||
|
||||
#define _mm_store_pd(DBLPTR_0, M128D_0) \
|
||||
(*(__m128d *)(double *)(DBLPTR_0) = (M128D_0))
|
||||
|
||||
#define _mm_storeu_pd(DBLPTR_0, M128D_0) \
|
||||
(*(__m128d_u *)(double *)(DBLPTR_0) = (M128D_0))
|
||||
|
||||
#define _mm_store_sd(DBLPTR_0, M128D_0) \
|
||||
(*(double *)(DBLPTR_0) = ((__v2df)(M128D_0))[0])
|
||||
|
||||
#define _mm_cvtsd_f64(M128D_0) (((__v2df)(M128D_0))[0])
|
||||
|
||||
#define _mm_storel_pd(DBLPTR_0, M128D_0) \
|
||||
(_mm_store_sd((double *)(DBLPTR_0), (M128D_0)))
|
||||
|
||||
#define _mm_storeh_pd(DBLPTR_0, M128D_0) \
|
||||
(*(double *)(DBLPTR_0) = ((__v2df)(M128D_0))[1])
|
||||
|
||||
#define _mm_store1_pd(DBLPTR_0, M128D_0) \
|
||||
(_mm_store_pd((double *)(DBLPTR_0), \
|
||||
__builtin_ia32_shufpd((M128D_0), (M128D_0), 0)))
|
||||
|
||||
#define _mm_store_pd1(DBLPTR_0, M128D_0) \
|
||||
(_mm_store1_pd((double *)(DBLPTR_0), (M128D_0)))
|
||||
|
||||
#define _mm_storer_pd(DBLPTR_0, M128D_0) \
|
||||
(_mm_store_pd((double *)(DBLPTR_0), \
|
||||
__builtin_ia32_shufpd((M128D_0), (M128D_0), 1)))
|
||||
|
||||
#define _mm_comieq_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdeq((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_comilt_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdlt((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_comile_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdle((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_comigt_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdgt((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_comige_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdge((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_comineq_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_comisdneq((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomieq_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdeq((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomilt_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdlt((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomile_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdle((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomigt_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdgt((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomige_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdge((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_ucomineq_sd(M128D_0, M128D_1) \
|
||||
(__builtin_ia32_ucomisdneq((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_cvtepi32_pd(M128I_0) ((__m128d)__builtin_ia32_cvtdq2pd((__v4si)__A))
|
||||
|
||||
#define _mm_cvtpd_epi32(M128D_0) \
|
||||
((__m128i)__builtin_ia32_cvtpd2dq((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtpd_pi32(M128D_0) \
|
||||
((__m64)__builtin_ia32_cvtpd2pi((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtpd_ps(M128D_0) \
|
||||
((__m128)__builtin_ia32_cvtpd2ps((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvttpd_epi32(M128D_0) \
|
||||
((__m128i)__builtin_ia32_cvttpd2dq((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvttpd_pi32(M128D_0) \
|
||||
((__m64)__builtin_ia32_cvttpd2pi((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtpi32_pd(M64_0) \
|
||||
((__m128d)__builtin_ia32_cvtpi2pd((__v2si)(M64_0)))
|
||||
|
||||
#define _mm_cvtps_pd(M128_0) \
|
||||
((__m128d)__builtin_ia32_cvtps2pd((__v4sf)(M128_0)))
|
||||
|
||||
#define _mm_cvtsd_si32(M128D_0) (__builtin_ia32_cvtsd2si((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtsd_si64(M128D_0) (__builtin_ia32_cvtsd2si64((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtsd_si64x(M128D_0) (__builtin_ia32_cvtsd2si64((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvttsd_si32(M128D_0) (__builtin_ia32_cvttsd2si((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvttsd_si64(M128D_0) (__builtin_ia32_cvttsd2si64((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvttsd_si64x(M128D_0) \
|
||||
(__builtin_ia32_cvttsd2si64((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_cvtsd_ss(M128_0, M128D_1) \
|
||||
((__m128)__builtin_ia32_cvtsd2ss((__v4sf)(M128_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_cvtsi32_sd(M128D_0, INT_1) \
|
||||
((__m128d)__builtin_ia32_cvtsi2sd((__v2df)(M128D_0), (INT_1)))
|
||||
|
||||
#define _mm_cvtsi64_sd(M128D_0, LL_1) \
|
||||
((__m128d)__builtin_ia32_cvtsi642sd((__v2df)(M128D_0), (LL_1)))
|
||||
|
||||
#define _mm_cvtsi64x_sd(M128D_0, LL_1) \
|
||||
((__m128d)__builtin_ia32_cvtsi642sd((__v2df)(M128D_0), (LL_1)))
|
||||
|
||||
#define _mm_cvtss_sd(M128D_0, M128_1) \
|
||||
((__m128d)__builtin_ia32_cvtss2sd((__v2df)(M128D_0), (__v4sf)(M128_1)))
|
||||
|
||||
#define _mm_shuffle_pd(M128D_0, M128D_1, MSK) \
|
||||
((__m128d)__builtin_ia32_shufpd((__v2df)(M128D_0), (__v2df)(M128D_1), (MSK)))
|
||||
|
||||
#define _mm_unpackhi_pd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_ia32_unpckhpd((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_unpacklo_pd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_ia32_unpcklpd((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_loadh_pd(M128D_0, DBL_CONSTPTR_1) \
|
||||
((__m128d)__builtin_ia32_loadhpd((__v2df)(M128D_0), \
|
||||
(double const *)(DBL_CONSTPTR_1)))
|
||||
|
||||
#define _mm_loadl_pd(M128D_0, DBL_CONSTPTR_1) \
|
||||
((__m128d)__builtin_ia32_loadlpd((__v2df)(M128D_0), \
|
||||
(double const *)(DBL_CONSTPTR_1)))
|
||||
|
||||
#define _mm_movemask_pd(M128D_0) (__builtin_ia32_movmskpd((__v2df)(M128D_0)))
|
||||
|
||||
#define _mm_stream_pd(DBLPTR_0, M128D_1) \
|
||||
(__builtin_ia32_movntpd((double *)(DBLPTR_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_castpd_ps(M128D_0) ((__m128)(M128D_0))
|
||||
|
||||
#define _mm_castpd_si128(M128D_0) ((__m128i)(M128D_0))
|
||||
|
||||
#define _mm_castps_pd(M128_0) ((__m128d)(M128_0))
|
||||
|
||||
#define _mm_cvtsi128_si64(M128I_0) (((__v2di)(M128I_0))[0])
|
||||
|
||||
#define _mm_cvtsi128_si64x(M128I_0) (((__v2di)(M128I_0))[0])
|
||||
|
||||
#define _mm_set_epi64(LL_1, LL_0) (_mm_set_epi64x((LL_1), (LL_0)))
|
||||
|
||||
#define _mm_set1_epi64(M64_0) (_mm_set_epi64((M64_0), (M64_0)))
|
||||
|
||||
#define _mm_setr_epi64(M64_0, M64_1) (_mm_set_epi64((M64_1), (M64_0)))
|
||||
|
||||
#define _mm_loadu_si128(PTR) (*(__m128i_u const *)(PTR))
|
||||
|
||||
#define _mm_loadl_epi64(PTR) \
|
||||
(_mm_set_epi64((__m64)0LL, *(__m64_u *)(__m128i_u const *)(PTR)))
|
||||
|
||||
#define _mm_loadu_si64(PTR) (_mm_loadl_epi64((__m128i_u *)(PTR)))
|
||||
|
||||
#define _mm_store_si128(M128I_PTR_0, M128I_1) (*(M128I_PTR_0) = (M128I_1))
|
||||
|
||||
#define _mm_storeu_si128(PTR, M128I_1) (*(__m128i_u *)(PTR) = (M128I_1))
|
||||
|
||||
#define _mm_storel_epi64(PTR, M128I_1) \
|
||||
(*(__m64_u *)(__m128i_u *)(PTR) = (__m64)((__v2di)(M128_I))[0])
|
||||
|
||||
#define _mm_storeu_si64(PTR, M128I_1) \
|
||||
(_mm_storel_epi64((__m128i_u *)(PTR), M128I_1))
|
||||
|
||||
#define _mm_movepi64_pi64(M128I_1) ((__m64)((__v2di)(M128I_1))[0])
|
||||
|
||||
#define _mm_movpi64_epi64(M64_0) (_mm_set_epi64((__m64)0LL, (M64_0)))
|
||||
|
||||
#define _mm_move_epi64(M128I_0) \
|
||||
((__m128i)__builtin_ia32_movq128((__v2di)(M128I_0)))
|
||||
|
||||
#define _mm_undefined_si128() ({ __m128i __Y = __Y; })
|
||||
|
||||
#define _mm_cvtepi32_ps(M128I_0) \
|
||||
((__m128)__builtin_ia32_cvtdq2ps((__v4si)(M128I_0)))
|
||||
|
||||
#define _mm_cvtps_epi32(M128_0) \
|
||||
((__m128i)__builtin_ia32_cvtps2dq((__v4sf)(M128_0)))
|
||||
|
||||
#define _mm_cvttps_epi32(M128_0) \
|
||||
((__m128i)__builtin_ia32_cvttps2dq((__v4sf)(M128_0)))
|
||||
|
||||
#define _mm_packs_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_packsswb128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_packs_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_packssdw128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_packus_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_packuswb128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_unpackhi_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpckhbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_unpackhi_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpckhwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_unpackhi_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpckhdq128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_unpacklo_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpcklbw128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_unpacklo_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpcklwd128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_unpacklo_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_punpckldq128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_add_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)((__v16qu)(M128I_0) + (__v16qu)(M128I_1)))
|
||||
|
||||
#define _mm_add_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hu)(M128I_0) + (__v8hu)(M128I_1)))
|
||||
|
||||
#define _mm_adds_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_paddsb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_adds_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_paddsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_adds_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_paddusb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_adds_epu16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_paddusw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_sub_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)((__v16qu)(M128I_0) - (__v16qu)(M128I_1)))
|
||||
|
||||
#define _mm_sub_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hu)(M128I_0) - (__v8hu)(M128I_1)))
|
||||
|
||||
#define _mm_sub_epi64(M128I_0, M128I_1) \
|
||||
((__m128i)((__v2du)(M128I_0) - (__v2du)(M128I_1)))
|
||||
|
||||
#define _mm_mulhi_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmulhw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_mullo_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hu)(M128I_0) * (__v8hu)(M128I_1)))
|
||||
|
||||
#define _mm_mul_su32(M64_0, M64_1) \
|
||||
((__m64)__builtin_ia32_pmuludq((__v2si)(M64_0), (__v2si)(M64_1)))
|
||||
|
||||
#define _mm_slli_epi16(M128I_0, INT_1) \
|
||||
((__m128i)__builtin_ia32_psllwi128((__v8hi)(M128I_0), (INT_1)))
|
||||
|
||||
#define _mm_srai_epi16(M128I_0, INT_1) \
|
||||
((__m128i)__builtin_ia32_psrawi128((__v8hi)(M128I_0), (INT_1)))
|
||||
|
||||
#define _mm_srai_epi32(M128I_0, INT_1) \
|
||||
((__m128i)__builtin_ia32_psradi128((__v4si)(M128I_0), (INT_1)))
|
||||
|
||||
#define _mm_bsrli_si128(M128I_0, N) \
|
||||
((__m128i)__builtin_ia32_psrldqi128((M128I_0), (N)*8))
|
||||
|
||||
#define _mm_bslli_si128(M128I_0, N) \
|
||||
((__m128i)__builtin_ia32_pslldqi128((M128I_0), (N)*8))
|
||||
|
||||
#define _mm_srli_epi16(M128I_0, INT_1) \
|
||||
((__m128i)__builtin_ia32_psrlwi128((__v8hi)(M128I_0), (INT_1)))
|
||||
|
||||
#define _mm_srli_epi32(M128I_0, INT_1) \
|
||||
((__m128i)__builtin_ia32_psrldi128((__v4si)(M128I_0), (INT_1)))
|
||||
|
||||
#define _mm_sll_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psllw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_sll_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pslld128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_sll_epi64(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psllq128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_sra_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psraw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_sra_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psrad128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_srl_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psrlw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_srl_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psrld128((__v4si)(M128I_0), (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_srl_epi64(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_psrlq128((__v2di)(M128I_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_cmpeq_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hi)(M128I_0) == (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_cmpeq_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4si)(M128I_0) == (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_cmplt_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)((__v16qs)(M128I_0) < (__v16qs)(M128I_1)))
|
||||
|
||||
#define _mm_cmplt_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hi)(M128I_0) < (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_cmplt_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4si)(M128I_0) < (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_cmpgt_epi8(M128I_0, M128I_1) \
|
||||
((__m128i)((__v16qs)(M128I_0) > (__v16qs)(M128I_1)))
|
||||
|
||||
#define _mm_cmpgt_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)((__v8hi)(M128I_0) > (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_cmpgt_epi32(M128I_0, M128I_1) \
|
||||
((__m128i)((__v4si)(M128I_0) > (__v4si)(M128I_1)))
|
||||
|
||||
#define _mm_extract_epi16(M128I_CONST_0, INT_1) \
|
||||
((unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(M128I_CONST_0), \
|
||||
(INT_1)))
|
||||
|
||||
#define _mm_insert_epi16(M128I_CONST_0, INT_1, INT_2) \
|
||||
((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(M128I_CONST_0), INT_1, INT_2))
|
||||
|
||||
#define _mm_max_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaxsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_max_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmaxub128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_min_epi16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pminsw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_min_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pminub128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_mulhi_epu16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pmulhuw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_shufflehi_epi16(M128I_0, MSK) \
|
||||
((__m128i)__builtin_ia32_pshufhw((__v8hi)(M128I_0), (MSK)))
|
||||
|
||||
#define _mm_shufflelo_epi16(M128I_0, MSK) \
|
||||
((__m128i)__builtin_ia32_pshuflw((__v8hi)(M128I_0), (MSK)))
|
||||
|
||||
#define _mm_maskmoveu_si128(M128I_0, M128I_1, CHAR_PTR_2) \
|
||||
(__builtin_ia32_maskmovdqu((__v16qi)(M128I_0), (__v16qi)(M128I_1), (char*)(CHAR_PTR_2))
|
||||
|
||||
#define _mm_avg_epu8(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pavgb128((__v16qi)(M128I_0), (__v16qi)(M128I_1)))
|
||||
|
||||
#define _mm_avg_epu16(M128I_0, M128I_1) \
|
||||
((__m128i)__builtin_ia32_pavgw128((__v8hi)(M128I_0), (__v8hi)(M128I_1)))
|
||||
|
||||
#define _mm_stream_si32(INT_PTR_0, INT_1) \
|
||||
(__builtin_ia32_movnti((int *)(INT_PTR_0), (INT_1)))
|
||||
|
||||
#define _mm_stream_si64(LL_PTR_0, LL_1) \
|
||||
(__builtin_ia32_movnti64((LL_PTR_0), (LL_1)))
|
||||
|
||||
#define _mm_stream_si128(M128I_PTR_0, M128I_1) \
|
||||
(__builtin_ia32_movntdq((__v2di *)(M128I_PTR_0), (__v2di)(M128I_1)))
|
||||
|
||||
#define _mm_clflush(PTR) (__builtin_ia32_clflush((void const *)(PTR)))
|
||||
|
||||
#define _mm_lfence() (__builtin_ia32_lfence())
|
||||
|
||||
#define _mm_mfence() (__builtin_ia32_mfence())
|
||||
|
||||
#define _mm_cvtsi64_si128(LL_0) (_mm_set_epi64x(0, (LL_0)))
|
||||
|
||||
#define _mm_cvtsi64x_si128(LL_0) (_mm_set_epi64x(0, (LL_0)))
|
||||
|
||||
#define _mm_castsi128_pd(M128I_0) ((__m128d)(M128I_0))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_EMMINTRIN_H_ */
|
||||
|
|
|
@ -1,14 +1,42 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#define COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_
|
||||
#include "libc/intrin/emmintrin.internal.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/*───────────────────────────────────────────────────────────────────────────│─╗
|
||||
│ cosmopolitan § it's a trap! » sse3 ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
#define _mm_hadd_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_haddps((__v4sf)(__m128)(M128_0), \
|
||||
(__v4sf)(__m128)(M128_0)))
|
||||
#define _mm_addsub_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_addsubps((__v4sf)(M128_0), (__v4sf)(M128_1)))
|
||||
|
||||
#define _mm_hadd_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_haddps((__v4sf)(M128_0), (__v4sf)(M128_1)))
|
||||
|
||||
#define _mm_hsub_ps(M128_0, M128_1) \
|
||||
((__m128)__builtin_ia32_hsubps((__v4sf)(M128_0), (__v4sf)(M128_1)))
|
||||
|
||||
#define _mm_movehdup_ps(M128_0) \
|
||||
((__m128)__builtin_ia32_movshdup((__v4sf)(M128_0)))
|
||||
|
||||
#define _mm_moveldup_ps(M128_0) \
|
||||
((__m128)__builtin_ia32_movsldup((__v4sf)(M128_0)))
|
||||
|
||||
#define _mm_addsub_pd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_ia32_addsubpd((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_hadd_pd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_ia32_haddpd((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_hsub_pd(M128D_0, M128D_1) \
|
||||
((__m128d)__builtin_ia32_hsubpd((__v2df)(M128D_0), (__v2df)(M128D_1)))
|
||||
|
||||
#define _mm_movedup_pd(M128D_0) (_mm_shuffle_pd((M128D_0), (M128D_0), 0))
|
||||
|
||||
#define _mm_loaddup_pd(CONSTDBL_PTR) (_mm_load1_pd((CONSTDBL_PTR)))
|
||||
|
||||
#define _mm_lddqu_si128(M128I_PTR0) \
|
||||
((__m128i)__builtin_ia32_lddqu((char const *)(M128I_PTR0)))
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_BITS_PMMINTRIN_H_ */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue