Upgrade pl_mpeg

This change gets printvideo working on aarch64. Performance improvements
have been introduced for magikarp decimation on aarch64. The last of the
old portable x86 intrinsics library is gone, but it still lives in Blink
This commit is contained in:
Justine Tunney 2024-09-06 19:02:53 -07:00
parent 5d3b91d8b9
commit d1157d471f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
46 changed files with 4587 additions and 4449 deletions

View file

@ -1,82 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
#define COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
#include "libc/dce.h"
#include "libc/nexgen32e/x86feature.h"
#define INTRIN_COMMUTATIVE "%"
#define INTRIN_NONCOMMUTATIVE
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
typedef char __intrin_xmm_t
__attribute__((__vector_size__(16), __aligned__(16), __may_alias__));
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) \
do { \
if (X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(C); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm2), "0"(*Xmm1)); \
} else { \
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm1), "x"(*Xmm2)); \
} \
} else { \
PURE(A, B, C); \
} \
} while (0)
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) \
do { \
if (X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} else { \
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
PURE(A, B, I); \
} \
} while (0)
#define INTRIN_SSEVEX_X_X_(PURE, ISA, OP, A, B) \
do { \
if (X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "0"(*Xmm1)); \
} else { \
asm("v" OP "\t%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1)); \
} \
} else { \
PURE(A, B); \
} \
} while (0)
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) \
do { \
if (!IsModeDbg() && X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
PURE(A, B, I); \
} \
} while (0)
#else
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) PURE(A, B, C)
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
#define INTRIN_SSEVEX_X_X_(PURE, ISA, OP, A, B) PURE(A, B)
#endif /* X86 && !ANSI */
#endif /* COSMOPOLITAN_LIBC_INTRIN_MACROS_H_ */

View file

@ -1,40 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/packsswb.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/str/str.h"
/**
* Casts shorts to signed chars w/ saturation.
*
* 𝑎 {CLAMP[𝑏]|𝑖[0,4)} {CLAMP[𝑐]|𝑖[4,8)}
*
* @see packuswb()
* @mayalias
*/
void(packsswb)(int8_t a[16], const int16_t b[8], const int16_t c[8]) {
unsigned i;
int8_t r[16];
for (i = 0; i < 8; ++i)
r[i + 0] = MIN(INT8_MAX, MAX(INT8_MIN, b[i]));
for (i = 0; i < 8; ++i)
r[i + 8] = MIN(INT8_MAX, MAX(INT8_MIN, c[i]));
__builtin_memcpy(a, r, 16);
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void packsswb(int8_t[16], const int16_t[8], const int16_t[8]);
#define packsswb(A, B, C) \
INTRIN_SSEVEX_X_X_X_(packsswb, SSE2, "packsswb", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_ */

View file

@ -1,40 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/packuswb.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/str/str.h"
/**
* Casts shorts to unsigned chars w/ saturation.
*
* 𝑎 {CLAMP[𝑏]|𝑖[0,4)} {CLAMP[𝑐]|𝑖[4,8)}
*
* @see packsswb()
* @mayalias
*/
void(packuswb)(uint8_t a[16], const int16_t b[8], const int16_t c[8]) {
unsigned i;
uint8_t r[16];
for (i = 0; i < 8; ++i)
r[i + 0] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i]));
for (i = 0; i < 8; ++i)
r[i + 8] = MIN(UINT8_MAX, MAX(UINT8_MIN, c[i]));
__builtin_memcpy(a, r, 16);
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void packuswb(uint8_t[16], const int16_t[8], const int16_t[8]);
#define packuswb(A, B, C) \
INTRIN_SSEVEX_X_X_X_(packuswb, SSE2, "packuswb", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_ */

View file

@ -1,39 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/paddw.h"
#include "libc/str/str.h"
/**
* Adds 16-bit integers.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @note shorts can't overflow so ubsan won't report it when it happens
* @see paddsw()
* @mayalias
*/
void(paddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
unsigned i;
int16_t r[8];
for (i = 0; i < 8; ++i) {
r[i] = b[i] + c[i];
}
__builtin_memcpy(a, r, 16);
}

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void paddw(int16_t[8], const int16_t[8], const int16_t[8]);
#define paddw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(paddw, SSE2, "paddw", INTRIN_COMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDW_H_ */

View file

@ -1,43 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/palignr.h"
#include "libc/assert.h"
#include "libc/macros.h"
/**
* Overlaps vectors.
*
* 𝑖= 0 means 𝑐𝑎
* 0<𝑖<16 means 𝑐𝑎𝑏
* 𝑖=16 means 𝑐𝑏
* 16<𝑖<32 means 𝑐𝑏0
* 𝑖32 means 𝑐0
*
* @param 𝑖 goes faster as constexpr
* @note not compatible with mmx
* @see pvalignr()
* @mayalias
*/
void(palignr)(void *c, const void *b, const void *a, unsigned long i) {
char t[48];
__builtin_memcpy(t, a, 16);
__builtin_memcpy(t + 16, b, 16);
__builtin_memset(t + 32, 0, 16);
__builtin_memcpy(c, t + MIN(i, 32), 16);
}

View file

@ -1,45 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#include "libc/intrin/macros.h"
#include "libc/str/str.h"
COSMOPOLITAN_C_START_
void palignr(void *, const void *, const void *, unsigned long);
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__) && defined(__x86_64__)
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
#define palignr(C, B, A, I) \
do { \
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3), \
1)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (__builtin_constant_p(I)) { \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
unsigned long Vimm = (I); \
typeof(__palignrs) *Fn; \
if (__builtin_expect(Vimm < 32, 1)) { \
Fn = (typeof(__palignrs) *)((uintptr_t) & __palignrs + Vimm * 8); \
*Xmm0 = Fn(*Xmm1, *Xmm2); \
} else { \
memset(Xmm0, 0, 16); \
} \
} \
} else { \
palignr(C, B, A, I); \
} \
} while (0)
#endif
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */

View file

@ -1,125 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set noet ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.h"
// Jump table for palignr() with non-constexpr immediate parameter.
//
// @note needs ssse3 cf. prescott c. 2004 cf. bulldozer c. 2011
// @see palignr()
.balign 8
__palignrs:
palignr $0,%xmm1,%xmm0
ret
nop
palignr $1,%xmm1,%xmm0
ret
nop
palignr $2,%xmm1,%xmm0
ret
nop
palignr $3,%xmm1,%xmm0
ret
nop
palignr $4,%xmm1,%xmm0
ret
nop
palignr $5,%xmm1,%xmm0
ret
nop
palignr $6,%xmm1,%xmm0
ret
nop
palignr $7,%xmm1,%xmm0
ret
nop
palignr $8,%xmm1,%xmm0
ret
nop
palignr $9,%xmm1,%xmm0
ret
nop
palignr $10,%xmm1,%xmm0
ret
nop
palignr $11,%xmm1,%xmm0
ret
nop
palignr $12,%xmm1,%xmm0
ret
nop
palignr $13,%xmm1,%xmm0
ret
nop
palignr $14,%xmm1,%xmm0
ret
nop
palignr $15,%xmm1,%xmm0
ret
nop
palignr $16,%xmm1,%xmm0
ret
nop
palignr $17,%xmm1,%xmm0
ret
nop
palignr $18,%xmm1,%xmm0
ret
nop
palignr $19,%xmm1,%xmm0
ret
nop
palignr $20,%xmm1,%xmm0
ret
nop
palignr $21,%xmm1,%xmm0
ret
nop
palignr $22,%xmm1,%xmm0
ret
nop
palignr $23,%xmm1,%xmm0
ret
nop
palignr $24,%xmm1,%xmm0
ret
nop
palignr $25,%xmm1,%xmm0
ret
nop
palignr $26,%xmm1,%xmm0
ret
nop
palignr $27,%xmm1,%xmm0
ret
nop
palignr $28,%xmm1,%xmm0
ret
nop
palignr $29,%xmm1,%xmm0
ret
nop
palignr $30,%xmm1,%xmm0
ret
nop
palignr $31,%xmm1,%xmm0
ret
.if . - __palignrs != 8 * 32 - 1
.error "bad assemblage"
.endif
.endfn __palignrs,globl

View file

@ -1,42 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pmaddubsw.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/str/str.h"
/**
* Multiplies bytes and adds adjacent results w/ short saturation.
*
* 𝑤 CLAMP[ 𝑏𝑐 + 𝑏𝑐 ]
*
* @param 𝑤 [w/o] receives shorts
* @param 𝑏 [r/o] is your byte data
* @param 𝑐 [r/o] are your int8 coefficients
* @note SSSE3 w/ Prescott c. 2004, Bulldozer c. 2011
* @note greatest simd op, like, ever
* @mayalias
*/
void(pmaddubsw)(int16_t w[8], const uint8_t b[16], const int8_t c[16]) {
unsigned i;
for (i = 0; i < 8; ++i) {
w[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, (c[i * 2 + 0] * b[i * 2 + 0] +
c[i * 2 + 1] * b[i * 2 + 1])));
}
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void pmaddubsw(int16_t[8], const uint8_t[16], const int8_t[16]);
#define pmaddubsw(W, B, C) \
INTRIN_SSEVEX_X_X_X_(pmaddubsw, SSSE3, "pmaddubsw", INTRIN_NONCOMMUTATIVE, \
W, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_ */

View file

@ -1,36 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pmulhrsw.h"
#include "libc/str/str.h"
/**
* Multiplies Q15 numbers.
*
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
* @note a.k.a. packed multiply high w/ round & scale
* @see Q2F(15,𝑥), F2Q(15,𝑥)
* @mayalias
*/
void(pmulhrsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
unsigned i;
int16_t r[8];
for (i = 0; i < 8; ++i)
r[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
__builtin_memcpy(a, r, 16);
}

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void pmulhrsw(int16_t a[8], const int16_t b[8], const int16_t c[8]);
#define pmulhrsw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(pmulhrsw, SSSE3, "pmulhrsw", INTRIN_COMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_ */

View file

@ -1,35 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/psraw.h"
/**
* Divides shorts by two power.
*
* @note c needs to be a literal, asmconstexpr, or linkconstsym
* @note arithmetic shift right will sign extend negatives
* @mayalias
*/
void(psraw)(int16_t a[8], const int16_t b[8], unsigned char k) {
unsigned i;
if (k > 15)
k = 15;
for (i = 0; i < 8; ++i) {
a[i] = b[i] >> k;
}
}

View file

@ -1,11 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void psraw(int16_t[8], const int16_t[8], unsigned char) libcesque;
#define psraw(A, B, I) INTRIN_SSEVEX_X_I_(psraw, SSE2, "psraw", A, B, I)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */