mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-15 23:29:56 +00:00
Fix SHUFPD and SHUFPS and add fuzz tests
See: https://github.com/jart/blink/issues/72 See: https://github.com/jart/blink/issues/68
This commit is contained in:
parent
2aa044cb0c
commit
2112fb1736
6 changed files with 182 additions and 2 deletions
|
@ -26,7 +26,7 @@
|
|||
*/
|
||||
void(shufpd)(double c[2], const double b[2], const double a[2], uint8_t m) {
|
||||
double t[2];
|
||||
t[0] = a[(m & 0b0000001) >> 0];
|
||||
t[1] = b[(m & 0b0000010) >> 1];
|
||||
t[0] = b[(m & 0b0000001) >> 0];
|
||||
t[1] = a[(m & 0b0000010) >> 1];
|
||||
__builtin_memcpy(c, t, 16);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
void shufpd(double[2], const double[2], const double[2], uint8_t);
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
|
||||
typedef double shufpd_t _Vector_size(16) forcealign(16) mayalias;
|
||||
shufpd_t shufpdjt(shufpd_t, shufpd_t);
|
||||
#define shufpd(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \
|
||||
shufpd_t *Xmm0 = (void *)(C); \
|
||||
const shufpd_t *Xmm1 = (const shufpd_t *)(B); \
|
||||
const shufpd_t *Xmm2 = (const shufpd_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("shufpd\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vshufpd\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
uint8_t Vimm = (I); \
|
||||
typeof(shufpdjt) *Fn; \
|
||||
Fn = (typeof(shufpdjt) *)((uintptr_t)&shufpdjt + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} \
|
||||
} else { \
|
||||
shufpd(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPD_H_ */
|
||||
|
|
30
libc/intrin/shufpdjt.S
Normal file
30
libc/intrin/shufpdjt.S
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
.align 8
|
||||
shufpdjt:
|
||||
i=0
|
||||
.rept 256
|
||||
shufpd $i,%xmm1,%xmm0
|
||||
ret
|
||||
.align 8
|
||||
i=i+1
|
||||
.endr
|
||||
.endfn shufpdjt,globl
|
|
@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
void shufps(float[4], const float[4], const float[4], uint8_t);
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
|
||||
typedef float shufps_t _Vector_size(16) forcealign(16) mayalias;
|
||||
shufps_t shufpsjt(shufps_t, shufps_t);
|
||||
#define shufps(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \
|
||||
shufps_t *Xmm0 = (void *)(C); \
|
||||
const shufps_t *Xmm1 = (const shufps_t *)(B); \
|
||||
const shufps_t *Xmm2 = (const shufps_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("shufps\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vshufps\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
uint8_t Vimm = (I); \
|
||||
typeof(shufpsjt) *Fn; \
|
||||
Fn = (typeof(shufpsjt) *)((uintptr_t)&shufpsjt + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} \
|
||||
} else { \
|
||||
shufps(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPS_H_ */
|
||||
|
|
30
libc/intrin/shufpsjt.S
Normal file
30
libc/intrin/shufpsjt.S
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
.align 8
|
||||
shufpsjt:
|
||||
i=0
|
||||
.rept 256
|
||||
shufps $i,%xmm1,%xmm0
|
||||
ret
|
||||
.align 8
|
||||
i=i+1
|
||||
.endr
|
||||
.endfn shufpsjt,globl
|
Loading…
Add table
Add a link
Reference in a new issue