mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-03 17:58:30 +00:00
Upgrade pl_mpeg
This change gets printvideo working on aarch64. Performance improvements have been introduced for magikarp decimation on aarch64. The last of the old portable x86 intrinsics library is gone, but it still lives in Blink
This commit is contained in:
parent
5d3b91d8b9
commit
d1157d471f
46 changed files with 4587 additions and 4449 deletions
|
@ -1,82 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
|
||||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
|
||||
#define INTRIN_COMMUTATIVE "%"
|
||||
#define INTRIN_NONCOMMUTATIVE
|
||||
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
|
||||
typedef char __intrin_xmm_t
|
||||
__attribute__((__vector_size__(16), __aligned__(16), __may_alias__));
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) \
|
||||
do { \
|
||||
if (X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(C); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm2), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm1), "x"(*Xmm2)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, C); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) \
|
||||
do { \
|
||||
if (X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, I); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_(PURE, ISA, OP, A, B) \
|
||||
do { \
|
||||
if (X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, I); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) PURE(A, B, C)
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
|
||||
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
|
||||
#define INTRIN_SSEVEX_X_X_(PURE, ISA, OP, A, B) PURE(A, B)
|
||||
#endif /* X86 && !ANSI */
|
||||
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_MACROS_H_ */
|
|
@ -1,40 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packsswb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Casts shorts to signed chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packuswb()
|
||||
* @mayalias
|
||||
*/
|
||||
void(packsswb)(int8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i + 0] = MIN(INT8_MAX, MAX(INT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i + 8] = MIN(INT8_MAX, MAX(INT8_MIN, c[i]));
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void packsswb(int8_t[16], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define packsswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packsswb, SSE2, "packsswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_ */
|
|
@ -1,40 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packuswb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Casts shorts to unsigned chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packsswb()
|
||||
* @mayalias
|
||||
*/
|
||||
void(packuswb)(uint8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i + 0] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i + 8] = MIN(UINT8_MAX, MAX(UINT8_MIN, c[i]));
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void packuswb(uint8_t[16], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define packuswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packuswb, SSE2, "packuswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_ */
|
|
@ -1,39 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @note shorts can't overflow so ubsan won't report it when it happens
|
||||
* @see paddsw()
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] + c[i];
|
||||
}
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define paddw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddw, SSE2, "paddw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDW_H_ */
|
|
@ -1,43 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/palignr.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Overlaps vectors.
|
||||
*
|
||||
* 𝑖= 0 means 𝑐←𝑎
|
||||
* 0<𝑖<16 means 𝑐←𝑎║𝑏
|
||||
* 𝑖=16 means 𝑐←𝑏
|
||||
* 16<𝑖<32 means 𝑐←𝑏║0
|
||||
* 𝑖≥32 means 𝑐←0
|
||||
*
|
||||
* @param 𝑖 goes faster as constexpr
|
||||
* @note not compatible with mmx
|
||||
* @see pvalignr()
|
||||
* @mayalias
|
||||
*/
|
||||
void(palignr)(void *c, const void *b, const void *a, unsigned long i) {
|
||||
char t[48];
|
||||
__builtin_memcpy(t, a, 16);
|
||||
__builtin_memcpy(t + 16, b, 16);
|
||||
__builtin_memset(t + 32, 0, 16);
|
||||
__builtin_memcpy(c, t + MIN(i, 32), 16);
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void palignr(void *, const void *, const void *, unsigned long);
|
||||
|
||||
#if !defined(__STRICT_ANSI__) && !defined(__chibicc__) && defined(__x86_64__)
|
||||
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (__builtin_expect(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3), \
|
||||
1)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (__builtin_constant_p(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__palignrs) *Fn; \
|
||||
if (__builtin_expect(Vimm < 32, 1)) { \
|
||||
Fn = (typeof(__palignrs) *)((uintptr_t) & __palignrs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} else { \
|
||||
memset(Xmm0, 0, 16); \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */
|
|
@ -1,125 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│ vi: set noet ft=asm ts=8 sw=8 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
// Jump table for palignr() with non-constexpr immediate parameter.
|
||||
//
|
||||
// @note needs ssse3 cf. prescott c. 2004 cf. bulldozer c. 2011
|
||||
// @see palignr()
|
||||
.balign 8
|
||||
__palignrs:
|
||||
palignr $0,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $1,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $2,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $3,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $4,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $5,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $6,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $7,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $8,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $9,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $10,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $11,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $12,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $13,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $14,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $15,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $16,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $17,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $18,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $19,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $20,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $21,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $22,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $23,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $24,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $25,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $26,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $27,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $28,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $29,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $30,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $31,%xmm1,%xmm0
|
||||
ret
|
||||
.if . - __palignrs != 8 * 32 - 1
|
||||
.error "bad assemblage"
|
||||
.endif
|
||||
.endfn __palignrs,globl
|
|
@ -1,42 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmaddubsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies bytes and adds adjacent results w/ short saturation.
|
||||
*
|
||||
* 𝑤ᵢ ← CLAMP[ 𝑏₂ᵢ𝑐₂ᵢ + 𝑏₍₂ᵢ₊₁₎𝑐₍₂ᵢ₊₁₎ ]
|
||||
*
|
||||
* @param 𝑤 [w/o] receives shorts
|
||||
* @param 𝑏 [r/o] is your byte data
|
||||
* @param 𝑐 [r/o] are your int8 coefficients
|
||||
* @note SSSE3 w/ Prescott c. 2004, Bulldozer c. 2011
|
||||
* @note greatest simd op, like, ever
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaddubsw)(int16_t w[8], const uint8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
w[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, (c[i * 2 + 0] * b[i * 2 + 0] +
|
||||
c[i * 2 + 1] * b[i * 2 + 1])));
|
||||
}
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmaddubsw(int16_t[8], const uint8_t[16], const int8_t[16]);
|
||||
|
||||
#define pmaddubsw(W, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaddubsw, SSSE3, "pmaddubsw", INTRIN_NONCOMMUTATIVE, \
|
||||
W, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_ */
|
|
@ -1,36 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulhrsw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies Q15 numbers.
|
||||
*
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @note a.k.a. packed multiply high w/ round & scale
|
||||
* @see Q2F(15,𝑥), F2Q(15,𝑥)
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulhrsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmulhrsw(int16_t a[8], const int16_t b[8], const int16_t c[8]);
|
||||
|
||||
#define pmulhrsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulhrsw, SSSE3, "pmulhrsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_ */
|
|
@ -1,35 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/psraw.h"
|
||||
|
||||
/**
|
||||
* Divides shorts by two power.
|
||||
*
|
||||
* @note c needs to be a literal, asmconstexpr, or linkconstsym
|
||||
* @note arithmetic shift right will sign extend negatives
|
||||
* @mayalias
|
||||
*/
|
||||
void(psraw)(int16_t a[8], const int16_t b[8], unsigned char k) {
|
||||
unsigned i;
|
||||
if (k > 15)
|
||||
k = 15;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = b[i] >> k;
|
||||
}
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void psraw(int16_t[8], const int16_t[8], unsigned char) libcesque;
|
||||
|
||||
#define psraw(A, B, I) INTRIN_SSEVEX_X_I_(psraw, SSE2, "psraw", A, B, I)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */
|
Loading…
Add table
Add a link
Reference in a new issue