From 2112fb17361ad58a4dea1da17e834288a769cc42 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 5 Mar 2023 16:12:28 -0800 Subject: [PATCH] Fix SHUFPD and SHUFPS and add fuzz tests See: https://github.com/jart/blink/issues/72 See: https://github.com/jart/blink/issues/68 --- libc/intrin/shufpd.c | 4 +-- libc/intrin/shufpd.h | 31 ++++++++++++++++++ libc/intrin/shufpdjt.S | 30 ++++++++++++++++++ libc/intrin/shufps.h | 31 ++++++++++++++++++ libc/intrin/shufpsjt.S | 30 ++++++++++++++++++ test/libc/intrin/intrin_test.c | 58 ++++++++++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 libc/intrin/shufpdjt.S create mode 100644 libc/intrin/shufpsjt.S diff --git a/libc/intrin/shufpd.c b/libc/intrin/shufpd.c index 51580a019..d327c28ed 100644 --- a/libc/intrin/shufpd.c +++ b/libc/intrin/shufpd.c @@ -26,7 +26,7 @@ */ void(shufpd)(double c[2], const double b[2], const double a[2], uint8_t m) { double t[2]; - t[0] = a[(m & 0b0000001) >> 0]; - t[1] = b[(m & 0b0000010) >> 1]; + t[0] = b[(m & 0b0000001) >> 0]; + t[1] = a[(m & 0b0000010) >> 1]; __builtin_memcpy(c, t, 16); } diff --git a/libc/intrin/shufpd.h b/libc/intrin/shufpd.h index 44806d46a..140704c69 100644 --- a/libc/intrin/shufpd.h +++ b/libc/intrin/shufpd.h @@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_ void shufpd(double[2], const double[2], const double[2], uint8_t); +#if !defined(__STRICT_ANSI__) && !defined(__chibicc__) +typedef double shufpd_t _Vector_size(16) forcealign(16) mayalias; +shufpd_t shufpdjt(shufpd_t, shufpd_t); +#define shufpd(C, B, A, I) \ + do { \ + if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \ + shufpd_t *Xmm0 = (void *)(C); \ + const shufpd_t *Xmm1 = (const shufpd_t *)(B); \ + const shufpd_t *Xmm2 = (const shufpd_t *)(A); \ + if (__builtin_constant_p(I)) { \ + if (!X86_NEED(AVX)) { \ + asm("shufpd\t%2,%1,%0" \ + : "=x"(*Xmm0) \ + : "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \ + } else { \ + asm("vshufpd\t%3,%2,%1,%0" \ + : "=x"(*Xmm0) \ + : "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \ + } \ + } else { \ + uint8_t Vimm = (I); \ + typeof(shufpdjt) *Fn; \ + Fn = (typeof(shufpdjt) *)((uintptr_t)&shufpdjt + Vimm * 8); \ + *Xmm0 = Fn(*Xmm1, *Xmm2); \ + } \ + } else { \ + shufpd(C, B, A, I); \ + } \ + } while (0) +#endif + COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPD_H_ */ diff --git a/libc/intrin/shufpdjt.S b/libc/intrin/shufpdjt.S new file mode 100644 index 000000000..7ca86aa0b --- /dev/null +++ b/libc/intrin/shufpdjt.S @@ -0,0 +1,30 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + + .align 8 +shufpdjt: + i=0 + .rept 256 + shufpd $i,%xmm1,%xmm0 + ret + .align 8 + i=i+1 + .endr + .endfn shufpdjt,globl diff --git a/libc/intrin/shufps.h b/libc/intrin/shufps.h index 2c04e38cd..ef8b576cf 100644 --- a/libc/intrin/shufps.h +++ b/libc/intrin/shufps.h @@ -5,6 +5,37 @@ COSMOPOLITAN_C_START_ void shufps(float[4], const float[4], const float[4], uint8_t); +#if !defined(__STRICT_ANSI__) && !defined(__chibicc__) +typedef float shufps_t _Vector_size(16) forcealign(16) mayalias; +shufps_t shufpsjt(shufps_t, shufps_t); +#define shufps(C, B, A, I) \ + do { \ + if (__builtin_expect(X86_NEED(SSE) && X86_HAVE(SSSE3), 1)) { \ + shufps_t *Xmm0 = (void *)(C); \ + const shufps_t *Xmm1 = (const shufps_t *)(B); \ + const shufps_t *Xmm2 = (const shufps_t *)(A); \ + if (__builtin_constant_p(I)) { \ + if (!X86_NEED(AVX)) { \ + asm("shufps\t%2,%1,%0" \ + : "=x"(*Xmm0) \ + : "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \ + } else { \ + asm("vshufps\t%3,%2,%1,%0" \ + : "=x"(*Xmm0) \ + : "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \ + } \ + } else { \ + uint8_t Vimm = (I); \ + typeof(shufpsjt) *Fn; \ + Fn = (typeof(shufpsjt) *)((uintptr_t)&shufpsjt + Vimm * 8); \ + *Xmm0 = Fn(*Xmm1, *Xmm2); \ + } \ + } else { \ + shufps(C, B, A, I); \ + } \ + } while (0) +#endif + COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* COSMOPOLITAN_LIBC_INTRIN_SHUFPS_H_ */ diff --git a/libc/intrin/shufpsjt.S b/libc/intrin/shufpsjt.S new file mode 100644 index 000000000..dfb862ee4 --- /dev/null +++ b/libc/intrin/shufpsjt.S @@ -0,0 +1,30 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + + .align 8 +shufpsjt: + i=0 + .rept 256 + shufps $i,%xmm1,%xmm0 + ret + .align 8 + i=i+1 + .endr + .endfn shufpsjt,globl diff --git a/test/libc/intrin/intrin_test.c b/test/libc/intrin/intrin_test.c index 7a205ba4b..a23f2ce29 100644 --- a/test/libc/intrin/intrin_test.c +++ b/test/libc/intrin/intrin_test.c @@ -32,6 +32,7 @@ #include "libc/intrin/paddusb.h" #include "libc/intrin/paddusw.h" #include "libc/intrin/paddw.h" +#include "libc/intrin/palignr.h" #include "libc/intrin/pand.h" #include "libc/intrin/pandn.h" #include "libc/intrin/pavgb.h" @@ -99,6 +100,8 @@ #include "libc/intrin/punpcklqdq.h" #include "libc/intrin/punpcklwd.h" #include "libc/intrin/pxor.h" +#include "libc/intrin/shufpd.h" +#include "libc/intrin/shufps.h" #include "libc/limits.h" #include "libc/log/check.h" #include "libc/mem/gc.internal.h" @@ -2120,3 +2123,58 @@ TEST(pext, fuzz) { ASSERT_EQ(pext(x, y), (pext)(x, y)); } } + +TEST(palignr, fuzz) { + int i, imm; + int8_t x[16], y[16], a[16], b[16]; + for (i = 0; i < 1000; ++i) { + for (imm = 0; imm < 32; ++imm) { + RngSet(x, sizeof(x)); + RngSet(y, sizeof(y)); + memcpy(a, x, 16); + memcpy(b, y, 16); + palignr(a, a, b, imm); + (palignr)(x, x, y, imm); + ASSERT_EQ(0, memcmp(a, x, 16)); + ASSERT_EQ(0, memcmp(b, y, 16)); + } + } +} + +TEST(shufps, fuzz) { + int i, imm; + char x[16], y[16], a[16], b[16]; + for (i = 0; i < 1000; ++i) { + for (imm = 0; imm < 256; ++imm) { + RngSet(x, sizeof(x)); + RngSet(y, sizeof(y)); + memcpy(a, x, 16); + memcpy(b, y, 16); + shufps((void *)a, (void *)a, (void *)b, imm); + (shufps)((void *)x, (void *)x, (void *)y, imm); + ASSERT_EQ(0, memcmp(a, x, 16)); + ASSERT_EQ(0, memcmp(b, y, 16)); + } + } +} + +TEST(shufpd, fuzz) { + int i, imm; + char x[16], y[16], a[16], b[16]; + for (i = 0; i < 1000; ++i) { + for (imm = 0; imm < 256; ++imm) { + RngSet(x, sizeof(x)); + RngSet(y, sizeof(y)); + memcpy(a, x, 16); + memcpy(b, y, 16); + shufpd((void *)a, (void *)a, (void *)b, imm); + (shufpd)((void *)x, (void *)x, (void *)y, imm); + ASSERT_EQ(0, memcmp(a, x, 16), + "imm=%d\n\t" + "a=%.*hhs\n\t" + "x=%.*hhs", + imm, 16, a, 16, x); + ASSERT_EQ(0, memcmp(b, y, 16), "imm=%d", imm); + } + } +}