mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-07 11:48:30 +00:00
Make minor improvements
This commit is contained in:
parent
04caf6f9ad
commit
95b142e4e5
95 changed files with 3818 additions and 2760 deletions
|
@ -10,11 +10,12 @@ void mpsadbw(uint16_t[8], const uint8_t[16], const uint8_t[16], uint8_t);
|
|||
__intrin_xmm_t __mpsadbws(__intrin_xmm_t, __intrin_xmm_t);
|
||||
#define mpsadbw(C, B, A, I) \
|
||||
do { \
|
||||
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1))) { \
|
||||
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE4_1), \
|
||||
1)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (isconstant(I)) { \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("mpsadbw\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
|
|
|
@ -9,35 +9,36 @@ void palignr(void *, const void *, const void *, unsigned long);
|
|||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
|
||||
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3))) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (isconstant(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__palignrs) *Fn; \
|
||||
if (likely(Vimm < 32)) { \
|
||||
Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} else { \
|
||||
memset(Xmm0, 0, 16); \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3), \
|
||||
1)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__palignrs) *Fn; \
|
||||
if (__builtin_expect(Vimm < 32, 1)) { \
|
||||
Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} else { \
|
||||
memset(Xmm0, 0, 16); \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
|
|
|
@ -8,27 +8,28 @@ void pslldq(uint8_t[16], const uint8_t[16], unsigned long);
|
|||
|
||||
#ifndef __STRICT_ANSI__
|
||||
__intrin_xmm_t __pslldqs(__intrin_xmm_t);
|
||||
#define pslldq(B, A, I) \
|
||||
do { \
|
||||
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(B); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
|
||||
if (isconstant(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__pslldqs) *Fn; \
|
||||
if (Vimm > 16) Vimm = 16; \
|
||||
Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1); \
|
||||
} \
|
||||
} else { \
|
||||
pslldq(B, A, I); \
|
||||
} \
|
||||
#define pslldq(B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
|
||||
1)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(B); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("pslldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpslldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__pslldqs) *Fn; \
|
||||
if (Vimm > 16) Vimm = 16; \
|
||||
Fn = (typeof(__pslldqs) *)((uintptr_t)&__pslldqs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1); \
|
||||
} \
|
||||
} else { \
|
||||
pslldq(B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
|
|
|
@ -8,27 +8,28 @@ void psrldq(uint8_t[16], const uint8_t[16], unsigned long);
|
|||
|
||||
#ifndef __STRICT_ANSI__
|
||||
__intrin_xmm_t __psrldqs(__intrin_xmm_t);
|
||||
#define psrldq(B, A, I) \
|
||||
do { \
|
||||
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2))) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(B); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
|
||||
if (isconstant(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__psrldqs) *Fn; \
|
||||
if (Vimm > 16) Vimm = 16; \
|
||||
Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1); \
|
||||
} \
|
||||
} else { \
|
||||
psrldq(B, A, I); \
|
||||
} \
|
||||
#define psrldq(B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSE2), \
|
||||
1)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(B); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("psrldq\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpsrldq\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__psrldqs) *Fn; \
|
||||
if (Vimm > 16) Vimm = 16; \
|
||||
Fn = (typeof(__psrldqs) *)((uintptr_t)&__psrldqs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1); \
|
||||
} \
|
||||
} else { \
|
||||
psrldq(B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue