mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-03-03 07:29:23 +00:00
Fix SHUFPD and SHUFPS and add fuzz tests
See: https://github.com/jart/blink/issues/72 See: https://github.com/jart/blink/issues/68
This commit is contained in:
parent
2aa044cb0c
commit
2112fb1736
6 changed files with 182 additions and 2 deletions
|
@ -26,7 +26,7 @@
|
|||
*/
|
||||
void(shufpd)(double c[2], const double b[2], const double a[2], uint8_t m) {
|
||||
double t[2];
|
||||
t[0] = a[(m & 0b0000001) >> 0];
|
||||
t[1] = b[(m & 0b0000010) >> 1];
|
||||
t[0] = b[(m & 0b0000001) >> 0];
|
||||
t[1] = a[(m & 0b0000010) >> 1];
|
||||
__builtin_memcpy(c, t, 16);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
void shufpd(double[2], const double[2], const double[2], uint8_t);
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
|
||||
typedef double shufpd_t _Vector_size(16) forcealign(16) mayalias;
|
||||
shufpd_t shufpdjt(shufpd_t, shufpd_t);
|
||||
#define shufpd(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \
|
||||
shufpd_t *Xmm0 = (void *)(C); \
|
||||
const shufpd_t *Xmm1 = (const shufpd_t *)(B); \
|
||||
const shufpd_t *Xmm2 = (const shufpd_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("shufpd\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vshufpd\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
uint8_t Vimm = (I); \
|
||||
typeof(shufpdjt) *Fn; \
|
||||
Fn = (typeof(shufpdjt) *)((uintptr_t)&shufpdjt + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} \
|
||||
} else { \
|
||||
shufpd(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPD_H_ */
|
||||
|
|
30
libc/intrin/shufpdjt.S
Normal file
30
libc/intrin/shufpdjt.S
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
.align 8
|
||||
shufpdjt:
|
||||
i=0
|
||||
.rept 256
|
||||
shufpd $i,%xmm1,%xmm0
|
||||
ret
|
||||
.align 8
|
||||
i=i+1
|
||||
.endr
|
||||
.endfn shufpdjt,globl
|
|
@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_
|
|||
|
||||
void shufps(float[4], const float[4], const float[4], uint8_t);
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__)
|
||||
typedef float shufps_t _Vector_size(16) forcealign(16) mayalias;
|
||||
shufps_t shufpsjt(shufps_t, shufps_t);
|
||||
#define shufps(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \
|
||||
shufps_t *Xmm0 = (void *)(C); \
|
||||
const shufps_t *Xmm1 = (const shufps_t *)(B); \
|
||||
const shufps_t *Xmm2 = (const shufps_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("shufps\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vshufps\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
uint8_t Vimm = (I); \
|
||||
typeof(shufpsjt) *Fn; \
|
||||
Fn = (typeof(shufpsjt) *)((uintptr_t)&shufpsjt + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} \
|
||||
} else { \
|
||||
shufps(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPS_H_ */
|
||||
|
|
30
libc/intrin/shufpsjt.S
Normal file
30
libc/intrin/shufpsjt.S
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
.align 8
|
||||
shufpsjt:
|
||||
i=0
|
||||
.rept 256
|
||||
shufps $i,%xmm1,%xmm0
|
||||
ret
|
||||
.align 8
|
||||
i=i+1
|
||||
.endr
|
||||
.endfn shufpsjt,globl
|
|
@ -32,6 +32,7 @@
|
|||
#include "libc/intrin/paddusb.h"
|
||||
#include "libc/intrin/paddusw.h"
|
||||
#include "libc/intrin/paddw.h"
|
||||
#include "libc/intrin/palignr.h"
|
||||
#include "libc/intrin/pand.h"
|
||||
#include "libc/intrin/pandn.h"
|
||||
#include "libc/intrin/pavgb.h"
|
||||
|
@ -99,6 +100,8 @@
|
|||
#include "libc/intrin/punpcklqdq.h"
|
||||
#include "libc/intrin/punpcklwd.h"
|
||||
#include "libc/intrin/pxor.h"
|
||||
#include "libc/intrin/shufpd.h"
|
||||
#include "libc/intrin/shufps.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/log/check.h"
|
||||
#include "libc/mem/gc.internal.h"
|
||||
|
@ -2120,3 +2123,58 @@ TEST(pext, fuzz) {
|
|||
ASSERT_EQ(pext(x, y), (pext)(x, y));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(palignr, fuzz) {
|
||||
int i, imm;
|
||||
int8_t x[16], y[16], a[16], b[16];
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
for (imm = 0; imm < 32; ++imm) {
|
||||
RngSet(x, sizeof(x));
|
||||
RngSet(y, sizeof(y));
|
||||
memcpy(a, x, 16);
|
||||
memcpy(b, y, 16);
|
||||
palignr(a, a, b, imm);
|
||||
(palignr)(x, x, y, imm);
|
||||
ASSERT_EQ(0, memcmp(a, x, 16));
|
||||
ASSERT_EQ(0, memcmp(b, y, 16));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(shufps, fuzz) {
|
||||
int i, imm;
|
||||
char x[16], y[16], a[16], b[16];
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
for (imm = 0; imm < 256; ++imm) {
|
||||
RngSet(x, sizeof(x));
|
||||
RngSet(y, sizeof(y));
|
||||
memcpy(a, x, 16);
|
||||
memcpy(b, y, 16);
|
||||
shufps((void *)a, (void *)a, (void *)b, imm);
|
||||
(shufps)((void *)x, (void *)x, (void *)y, imm);
|
||||
ASSERT_EQ(0, memcmp(a, x, 16));
|
||||
ASSERT_EQ(0, memcmp(b, y, 16));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(shufpd, fuzz) {
|
||||
int i, imm;
|
||||
char x[16], y[16], a[16], b[16];
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
for (imm = 0; imm < 256; ++imm) {
|
||||
RngSet(x, sizeof(x));
|
||||
RngSet(y, sizeof(y));
|
||||
memcpy(a, x, 16);
|
||||
memcpy(b, y, 16);
|
||||
shufpd((void *)a, (void *)a, (void *)b, imm);
|
||||
(shufpd)((void *)x, (void *)x, (void *)y, imm);
|
||||
ASSERT_EQ(0, memcmp(a, x, 16),
|
||||
"imm=%d\n\t"
|
||||
"a=%.*hhs\n\t"
|
||||
"x=%.*hhs",
|
||||
imm, 16, a, 16, x);
|
||||
ASSERT_EQ(0, memcmp(b, y, 16), "imm=%d", imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue