mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-12 14:09:12 +00:00
Add x86_64-linux-gnu emulator
I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
This commit is contained in:
parent
467504308a
commit
f4f4caab0e
1052 changed files with 65667 additions and 7825 deletions
|
@ -41,6 +41,10 @@ $(LIBC_INTRIN_A).pkg: \
|
|||
$(LIBC_INTRIN_A_OBJS) \
|
||||
$(foreach x,$(LIBC_INTRIN_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
$(LIBC_INTRIN_A_OBJS): \
|
||||
OVERRIDE_CFLAGS += \
|
||||
-fwrapv -O3
|
||||
|
||||
LIBC_INTRIN_LIBS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)))
|
||||
LIBC_INTRIN_HDRS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_HDRS))
|
||||
LIBC_INTRIN_SRCS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_SRCS))
|
||||
|
|
|
@ -7,13 +7,13 @@
|
|||
#define INTRIN_COMMUTATIVE "%"
|
||||
#define INTRIN_NONCOMMUTATIVE
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#if defined(__x86_64__) && !defined(__STRICT_ANSI__)
|
||||
|
||||
typedef char __intrin_xmm_t _Vector_size(16) mayalias;
|
||||
typedef char __intrin_xmm_t _Vector_size(16) aligned(16) mayalias;
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
|
||||
if (!IsModeDbg() && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(C); \
|
||||
|
@ -29,7 +29,37 @@ typedef char __intrin_xmm_t _Vector_size(16) mayalias;
|
|||
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
|
||||
if (!IsModeDbg() && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, I); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_(PURE, ISA, OP, A, B) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
|
@ -45,7 +75,8 @@ typedef char __intrin_xmm_t _Vector_size(16) mayalias;
|
|||
#else
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) PURE(A, B, C)
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
|
||||
#endif /* ANSI */
|
||||
#define INTRIN_SSEVEX_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
|
||||
#endif /* X86 && !ANSI */
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_MACROS_H_ */
|
||||
|
|
35
libc/intrin/pabsb.c
Normal file
35
libc/intrin/pabsb.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pabsb.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Converts signed bytes to absolute values, 𝑎ᵢ ← |𝑏ᵢ|.
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
*/
|
||||
void(pabsb)(uint8_t a[16], const int8_t b[16]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = ABS(b[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
13
libc/intrin/pabsb.h
Normal file
13
libc/intrin/pabsb.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PABSB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PABSB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pabsb(uint8_t[16], const int8_t[16]);
|
||||
|
||||
#define pabsb(A, B) INTRIN_SSEVEX_X_X_(pabsb, SSSE3, "pabsb", A, B)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PABSB_H_ */
|
35
libc/intrin/pabsd.c
Normal file
35
libc/intrin/pabsd.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pabsd.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Converts shorts to absolute values, 𝑎ᵢ ← |𝑏ᵢ|.
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
*/
|
||||
void(pabsd)(uint32_t a[4], const int32_t b[4]) {
|
||||
unsigned i;
|
||||
uint32_t r[4];
|
||||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = ABS(b[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
13
libc/intrin/pabsd.h
Normal file
13
libc/intrin/pabsd.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PABSD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PABSD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pabsd(uint32_t[4], const int32_t[4]);
|
||||
|
||||
#define pabsd(A, B) INTRIN_SSEVEX_X_X_(pabsd, SSSE3, "pabsd", A, B)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PABSD_H_ */
|
35
libc/intrin/pabsw.c
Normal file
35
libc/intrin/pabsw.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pabsw.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Converts shorts to absolute values, 𝑎ᵢ ← |𝑏ᵢ|.
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
*/
|
||||
void(pabsw)(uint16_t a[8], const int16_t b[8]) {
|
||||
unsigned i;
|
||||
uint16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = ABS(b[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
13
libc/intrin/pabsw.h
Normal file
13
libc/intrin/pabsw.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PABSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PABSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pabsw(uint16_t[8], const int16_t[8]);
|
||||
|
||||
#define pabsw(A, B) INTRIN_SSEVEX_X_X_(pabsw, SSSE3, "pabsw", A, B)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PABSW_H_ */
|
32
libc/intrin/packssdw.c
Normal file
32
libc/intrin/packssdw.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packssdw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Casts ints to shorts w/ saturation.
|
||||
* @mayalias
|
||||
*/
|
||||
void(packssdw)(int16_t a[8], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; ++i) a[i + 0] = MIN(INT16_MAX, MAX(INT16_MIN, b[i]));
|
||||
for (i = 0; i < 4; ++i) a[i + 4] = MIN(INT16_MAX, MAX(INT16_MIN, c[i]));
|
||||
}
|
15
libc/intrin/packssdw.h
Normal file
15
libc/intrin/packssdw.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void packssdw(int16_t[8], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define packssdw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packssdw, SSE2, "packssdw", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSDW_H_ */
|
39
libc/intrin/packsswb.c
Normal file
39
libc/intrin/packsswb.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packsswb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Casts shorts to signed chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packuswb()
|
||||
* @mayalias
|
||||
*/
|
||||
void(packsswb)(int8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 8; ++i) r[i + 0] = MIN(INT8_MAX, MAX(INT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i) r[i + 8] = MIN(INT8_MAX, MAX(INT8_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
}
|
|
@ -1,30 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* Casts shorts to signed chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packuswb()
|
||||
* @mayalias
|
||||
*/
|
||||
static void packsswb(signed char a[16], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(127, MAX(-128, b[i]));
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i + 8] = MIN(127, MAX(-128, c[i]));
|
||||
}
|
||||
}
|
||||
void packsswb(int8_t[16], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define packsswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packsswb, SSE2, "packsswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_ */
|
||||
|
|
35
libc/intrin/packusdw.c
Normal file
35
libc/intrin/packusdw.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packusdw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Casts ints to shorts w/ saturation.
|
||||
* @mayalias
|
||||
*/
|
||||
void(packusdw)(uint16_t a[8], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
uint16_t r[8];
|
||||
for (i = 0; i < 4; ++i) r[i + 0] = MIN(UINT16_MAX, MAX(UINT16_MIN, b[i]));
|
||||
for (i = 0; i < 4; ++i) r[i + 4] = MIN(UINT16_MAX, MAX(UINT16_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
}
|
15
libc/intrin/packusdw.h
Normal file
15
libc/intrin/packusdw.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void packusdw(uint16_t[8], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define packusdw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packusdw, SSE4_1, "packusdw", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSDW_H_ */
|
39
libc/intrin/packuswb.c
Normal file
39
libc/intrin/packuswb.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/packuswb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Casts shorts to unsigned chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packsswb()
|
||||
* @mayalias
|
||||
*/
|
||||
void(packuswb)(uint8_t a[16], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 8; ++i) r[i + 0] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i]));
|
||||
for (i = 0; i < 8; ++i) r[i + 8] = MIN(UINT8_MAX, MAX(UINT8_MIN, c[i]));
|
||||
memcpy(a, r, 16);
|
||||
}
|
|
@ -1,30 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* Casts shorts to unsigned chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packsswb()
|
||||
* @mayalias
|
||||
*/
|
||||
static void packuswb(unsigned char a[16], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(255, MAX(0, b[i]));
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i + 8] = MIN(255, MAX(0, c[i]));
|
||||
}
|
||||
}
|
||||
void packuswb(uint8_t[16], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define packuswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packuswb, SSE2, "packuswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_ */
|
||||
|
|
36
libc/intrin/paddb.c
Normal file
36
libc/intrin/paddb.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds 8-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = b[i] + c[i];
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddb.h
Normal file
14
libc/intrin/paddb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddb(int8_t[16], const int8_t[16], const int8_t[16]);
|
||||
|
||||
#define paddb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddb, SSE2, "paddb", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDB_H_ */
|
36
libc/intrin/paddd.c
Normal file
36
libc/intrin/paddd.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds 32-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) r[i] = b[i] + c[i];
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddd.h
Normal file
14
libc/intrin/paddd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddd(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define paddd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddd, SSE2, "paddd", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDD_H_ */
|
36
libc/intrin/paddq.c
Normal file
36
libc/intrin/paddq.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddq.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds 64-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddq)(int64_t a[2], const int64_t b[2], const int64_t c[2]) {
|
||||
unsigned i;
|
||||
int64_t r[2];
|
||||
for (i = 0; i < 2; ++i) r[i] = b[i] + c[i];
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddq.h
Normal file
14
libc/intrin/paddq.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDQ_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDQ_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddq(int64_t[2], const int64_t[2], const int64_t[2]);
|
||||
|
||||
#define paddq(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddq, SSE2, "paddq", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDQ_H_ */
|
40
libc/intrin/paddsb.c
Normal file
40
libc/intrin/paddsb.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddsb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds signed 8-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddsb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = MIN(INT8_MAX, MAX(INT8_MIN, b[i] + c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddsb.h
Normal file
14
libc/intrin/paddsb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDSB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDSB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddsb(int8_t[16], const int8_t[16], const int8_t[16]);
|
||||
|
||||
#define paddsb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddsb, SSE2, "paddsb", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDSB_H_ */
|
40
libc/intrin/paddsw.c
Normal file
40
libc/intrin/paddsw.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds signed 16-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, b[i] + c[i]));
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
|
@ -1,28 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* Adds signed 16-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @see paddw()
|
||||
* @mayalias
|
||||
*/
|
||||
static void paddsw(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, b[i] + c[i]));
|
||||
}
|
||||
}
|
||||
void paddsw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define paddsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddsw, SSE2, "paddsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_ */
|
||||
|
|
40
libc/intrin/paddusb.c
Normal file
40
libc/intrin/paddusb.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddusb.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds unsigned 8-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddusb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = MIN(UINT8_MAX, b[i] + c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddusb.h
Normal file
14
libc/intrin/paddusb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDUSB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDUSB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddusb(uint8_t[16], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define paddusb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddusb, SSE2, "paddusb", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDUSB_H_ */
|
40
libc/intrin/paddusw.c
Normal file
40
libc/intrin/paddusw.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddusw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds unsigned 16-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddusw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
||||
unsigned i;
|
||||
uint16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(UINT16_MAX, b[i] + c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/paddusw.h
Normal file
14
libc/intrin/paddusw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDUSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDUSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void paddusw(uint16_t[8], const uint16_t[8], const uint16_t[8]);
|
||||
|
||||
#define paddusw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddusw, SSE2, "paddusw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDUSW_H_ */
|
40
libc/intrin/paddw.c
Normal file
40
libc/intrin/paddw.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/paddw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @note shorts can't overflow so ubsan won't report it when it happens
|
||||
* @see paddsw()
|
||||
* @mayalias
|
||||
*/
|
||||
void(paddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] + c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
|
@ -1,28 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* Adds signed 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @note shorts can't overflow so ubsan won't report it when it happens
|
||||
* @see paddsw()
|
||||
* @mayalias
|
||||
*/
|
||||
static void paddw(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
}
|
||||
void paddw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define paddw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddw, SSE2, "paddw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDW_H_ */
|
||||
|
|
44
libc/intrin/palignr.c
Normal file
44
libc/intrin/palignr.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/intrin/palignr.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Overlaps vectors.
|
||||
*
|
||||
* 𝑖= 0 means 𝑐←𝑎
|
||||
* 0<𝑖<16 means 𝑐←𝑎║𝑏
|
||||
* 𝑖=16 means 𝑐←𝑏
|
||||
* 16<𝑖<32 means 𝑐←𝑏║0
|
||||
* 𝑖≥32 means 𝑐←0
|
||||
*
|
||||
* @param 𝑖 goes faster as constexpr
|
||||
* @note not compatible with mmx
|
||||
* @see pvalignr()
|
||||
* @mayalias
|
||||
*/
|
||||
void(palignr)(void *c, const void *b, const void *a, unsigned long i) {
|
||||
char t[48];
|
||||
memcpy(t, a, 16);
|
||||
memcpy(t + 16, b, 16);
|
||||
memset(t + 32, 0, 16);
|
||||
memcpy(c, t + MIN(i, 32), 16);
|
||||
}
|
|
@ -1,55 +1,46 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#include "libc/assert.h"
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pvalignr(void *, const void *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* Overlaps vectors.
|
||||
*
|
||||
* 𝑖= 0 means 𝑐←𝑎
|
||||
* 0<𝑖<16 means 𝑐←𝑎║𝑏
|
||||
* 𝑖=16 means 𝑐←𝑏
|
||||
* 16<𝑖<32 means 𝑐←𝑏║0
|
||||
* 𝑖≥32 means 𝑐←0
|
||||
*
|
||||
* @param 𝑖 needs to be a literal, constexpr, or embedding
|
||||
* @see pvalignr()
|
||||
* @mayalias
|
||||
*/
|
||||
static void palignr(void *c, const void *b, const void *a, size_t i) {
|
||||
char t[48];
|
||||
memcpy(t, a, 16);
|
||||
memcpy(t + 16, b, 16);
|
||||
memset(t + 32, 0, 16);
|
||||
memcpy(c, t + MIN(32, i), 16);
|
||||
}
|
||||
void palignr(void *, const void *, const void *, unsigned long);
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
__intrin_xmm_t __palignrs(__intrin_xmm_t, __intrin_xmm_t);
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (likely(!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3))) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (isconstant(I)) { \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
unsigned long Vimm = (I); \
|
||||
typeof(__palignrs) *Fn; \
|
||||
if (likely(Vimm < 32)) { \
|
||||
Fn = (typeof(__palignrs) *)((uintptr_t)&__palignrs + Vimm * 8); \
|
||||
*Xmm0 = Fn(*Xmm1, *Xmm2); \
|
||||
} else { \
|
||||
memset(Xmm0, 0, 16); \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */
|
||||
|
|
126
libc/intrin/palignrs.S
Normal file
126
libc/intrin/palignrs.S
Normal file
|
@ -0,0 +1,126 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Jump table for palignr() with non-constexpr immediate parameter.
|
||||
/
|
||||
/ @note needs ssse3 cf. prescott c. 2004 cf. bulldozer c. 2011
|
||||
/ @see palignr()
|
||||
.align 8
|
||||
__palignrs:
|
||||
palignr $0,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $1,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $2,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $3,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $4,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $5,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $6,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $7,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $8,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $9,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $10,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $11,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $12,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $13,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $14,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $15,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $16,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $17,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $18,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $19,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $20,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $21,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $22,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $23,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $24,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $25,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $26,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $27,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $28,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $29,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $30,%xmm1,%xmm0
|
||||
ret
|
||||
nop
|
||||
palignr $31,%xmm1,%xmm0
|
||||
ret
|
||||
.if . - __palignrs != 8 * 32 - 1
|
||||
.error "bad assemblage"
|
||||
.endif
|
||||
.endfn __palignrs,globl
|
35
libc/intrin/pand.c
Normal file
35
libc/intrin/pand.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pand.h"
|
||||
|
||||
/**
|
||||
* Nands 128-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pand)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
a[i] = b[i] & c[i];
|
||||
}
|
||||
}
|
14
libc/intrin/pand.h
Normal file
14
libc/intrin/pand.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PAND_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PAND_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pand(uint64_t[2], const uint64_t[2], const uint64_t[2]);
|
||||
|
||||
#define pand(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pand, SSE2, "pand", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PAND_H_ */
|
35
libc/intrin/pandn.c
Normal file
35
libc/intrin/pandn.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pandn.h"
|
||||
|
||||
/**
|
||||
* Nands 128-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pandn)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
a[i] = ~b[i] & c[i];
|
||||
}
|
||||
}
|
14
libc/intrin/pandn.h
Normal file
14
libc/intrin/pandn.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pandn(uint64_t[2], const uint64_t[2], const uint64_t[2]);
|
||||
|
||||
#define pandn(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pandn, SSE2, "pandn", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PANDN_H_ */
|
38
libc/intrin/pavgb.c
Normal file
38
libc/intrin/pavgb.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pavgb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Averages packed 8-bit unsigned integers w/ rounding.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pavgb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) {
|
||||
r[i] = (b[i] + c[i] + 1) >> 1;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pavgb.h
Normal file
14
libc/intrin/pavgb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PAVGB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PAVGB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pavgb(uint8_t[16], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define pavgb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pavgb, SSE2, "pavgb", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PAVGB_H_ */
|
38
libc/intrin/pavgw.c
Normal file
38
libc/intrin/pavgw.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pavgw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Averages packed 16-bit unsigned integers w/ rounding.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pavgw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = (b[i] + c[i] + 1) >> 1;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pavgw.h
Normal file
14
libc/intrin/pavgw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PAVGW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PAVGW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pavgw(uint16_t[8], const uint16_t[8], const uint16_t[8]);
|
||||
|
||||
#define pavgw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pavgw, SSE2, "pavgw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PAVGW_H_ */
|
36
libc/intrin/pcmpeqb.c
Normal file
36
libc/intrin/pcmpeqb.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpeqb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 8-bit integers w/ equal to predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpeqb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpeqb.h
Normal file
14
libc/intrin/pcmpeqb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPEQB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPEQB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpeqb(uint8_t[16], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define pcmpeqb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpeqb, SSE2, "pcmpeqb", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPEQB_H_ */
|
36
libc/intrin/pcmpeqd.c
Normal file
36
libc/intrin/pcmpeqd.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpeqd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 32-bit integers w/ equal to predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpeqd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpeqd.h
Normal file
14
libc/intrin/pcmpeqd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPEQD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPEQD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpeqd(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define pcmpeqd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpeqd, SSE2, "pcmpeqd", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPEQD_H_ */
|
36
libc/intrin/pcmpeqw.c
Normal file
36
libc/intrin/pcmpeqw.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpeqw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 16-bit integers w/ equal to predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpeqw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = -(b[i] == c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpeqw.h
Normal file
14
libc/intrin/pcmpeqw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPEQW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPEQW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpeqw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pcmpeqw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpeqw, SSE2, "pcmpeqw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPEQW_H_ */
|
36
libc/intrin/pcmpgtb.c
Normal file
36
libc/intrin/pcmpgtb.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 8-bit integers w/ greater than predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpgtb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpgtb.h
Normal file
14
libc/intrin/pcmpgtb.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpgtb(int8_t[16], const int8_t[16], const int8_t[16]);
|
||||
|
||||
#define pcmpgtb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpgtb, SSE2, "pcmpgtb", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_ */
|
36
libc/intrin/pcmpgtd.c
Normal file
36
libc/intrin/pcmpgtd.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 32-bit integers w/ greater than predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpgtd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpgtd.h
Normal file
14
libc/intrin/pcmpgtd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpgtd(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define pcmpgtd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpgtd, SSE2, "pcmpgtd", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTD_H_ */
|
36
libc/intrin/pcmpgtw.c
Normal file
36
libc/intrin/pcmpgtw.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 16-bit integers w/ greater than predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpgtw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = -(b[i] > c[i]);
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pcmpgtw.h
Normal file
14
libc/intrin/pcmpgtw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpgtw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pcmpgtw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpgtw, SSE2, "pcmpgtw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_ */
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/* TODO(jart): Implement polyfill. */
|
||||
#define pdep(NUMBER, BITMASK) \
|
||||
|
@ -10,5 +11,6 @@
|
|||
ShuffledBits; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PDEP_H_ */
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/* TODO(jart): Implement polyfill. */
|
||||
#define pext(NUMBER, BITMASK) \
|
||||
|
@ -10,5 +11,6 @@
|
|||
ShuffledBits; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PEXT_H_ */
|
||||
|
|
39
libc/intrin/phaddd.c
Normal file
39
libc/intrin/phaddd.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phaddd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds adjacent 32-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies two pairs of ints
|
||||
* @param 𝑐 [r/o] supplies two pairs of ints
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phaddd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
int32_t t[4];
|
||||
t[0] = b[0] + b[1];
|
||||
t[1] = b[2] + b[3];
|
||||
t[2] = c[0] + c[1];
|
||||
t[3] = c[2] + c[3];
|
||||
memcpy(a, t, sizeof(t));
|
||||
}
|
14
libc/intrin/phaddd.h
Normal file
14
libc/intrin/phaddd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHADDD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phaddd(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define phaddd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phaddd, SSSE3, "phaddd", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDD_H_ */
|
|
@ -30,7 +30,7 @@
|
|||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phaddsw)(short a[8], const short b[8], const short c[8]) {
|
||||
void(phaddsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
int i, t[8];
|
||||
t[0] = b[0] + b[1];
|
||||
t[1] = b[2] + b[3];
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
#define COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phaddsw(short[8], const short[8], const short[8]);
|
||||
void phaddsw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define phaddsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phaddsw, SSSE3, "phaddsw", INTRIN_NONCOMMUTATIVE, A, B, \
|
||||
C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_ */
|
||||
|
|
|
@ -18,9 +18,10 @@
|
|||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phaddw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Adds adjacent signed 16-bit integers.
|
||||
* Adds adjacent 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies four pairs of shorts
|
||||
|
@ -28,8 +29,8 @@
|
|||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phaddw)(short a[8], const short b[8], const short c[8]) {
|
||||
short t[8];
|
||||
void(phaddw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
int16_t t[8];
|
||||
t[0] = b[0] + b[1];
|
||||
t[1] = b[2] + b[3];
|
||||
t[2] = b[4] + b[5];
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phaddw(short[8], const short[8], const short[8]);
|
||||
void phaddw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define phaddw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phaddw, SSSE3, "phaddw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_ */
|
||||
|
|
39
libc/intrin/phsubd.c
Normal file
39
libc/intrin/phsubd.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phsubd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Subtracts adjacent 32-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies two pairs of ints
|
||||
* @param 𝑐 [r/o] supplies two pairs of ints
|
||||
* @note goes fast w/ ssse3
|
||||
* @mayalias
|
||||
*/
|
||||
void(phsubd)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
int32_t t[4];
|
||||
t[0] = b[0] - b[1];
|
||||
t[1] = b[2] - b[3];
|
||||
t[2] = c[0] - c[1];
|
||||
t[3] = c[2] - c[3];
|
||||
memcpy(a, t, sizeof(t));
|
||||
}
|
14
libc/intrin/phsubd.h
Normal file
14
libc/intrin/phsubd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHSUBD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHSUBD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phsubd(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define phsubd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phsubd, SSSE3, "phsubd", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHSUBD_H_ */
|
46
libc/intrin/phsubsw.c
Normal file
46
libc/intrin/phsubsw.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phsubsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Subtracts adjacent shorts w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies four pairs of shorts
|
||||
* @param 𝑐 [r/o] supplies four pairs of shorts
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phsubsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
int i, t[8];
|
||||
t[0] = b[0] - b[1];
|
||||
t[1] = b[2] - b[3];
|
||||
t[2] = b[4] - b[5];
|
||||
t[3] = b[6] - b[7];
|
||||
t[4] = c[0] - c[1];
|
||||
t[5] = c[2] - c[3];
|
||||
t[6] = c[4] - c[5];
|
||||
t[7] = c[6] - c[7];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, t[i]));
|
||||
}
|
||||
}
|
15
libc/intrin/phsubsw.h
Normal file
15
libc/intrin/phsubsw.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHSUBSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHSUBSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phsubsw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define phsubsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phsubsw, SSSE3, "phsubsw", INTRIN_NONCOMMUTATIVE, A, B, \
|
||||
C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHSUBSW_H_ */
|
43
libc/intrin/phsubw.c
Normal file
43
libc/intrin/phsubw.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phsubw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Subtracts adjacent 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies four pairs of shorts
|
||||
* @param 𝑐 [r/o] supplies four pairs of shorts
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phsubw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
int16_t t[8];
|
||||
t[0] = b[0] - b[1];
|
||||
t[1] = b[2] - b[3];
|
||||
t[2] = b[4] - b[5];
|
||||
t[3] = b[6] - b[7];
|
||||
t[4] = c[0] - c[1];
|
||||
t[5] = c[2] - c[3];
|
||||
t[6] = c[4] - c[5];
|
||||
t[7] = c[6] - c[7];
|
||||
memcpy(a, t, sizeof(t));
|
||||
}
|
14
libc/intrin/phsubw.h
Normal file
14
libc/intrin/phsubw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHSUBW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHSUBW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void phsubw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define phsubw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phsubw, SSSE3, "phsubw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHSUBW_H_ */
|
|
@ -20,6 +20,7 @@
|
|||
#include "libc/intrin/pmaddubsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies bytes and adds adjacent results w/ short saturation.
|
||||
|
@ -33,9 +34,8 @@
|
|||
* @note greatest simd op, like, ever
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaddubsw)(short w[8], const unsigned char b[16],
|
||||
const signed char c[16]) {
|
||||
int i;
|
||||
void(pmaddubsw)(int16_t w[8], const uint8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
w[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, (c[i * 2 + 0] * b[i * 2 + 0] +
|
||||
c[i * 2 + 1] * b[i * 2 + 1])));
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
#define COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmaddubsw(short[8], const unsigned char[16], const signed char[16]);
|
||||
void pmaddubsw(int16_t[8], const uint8_t[16], const int8_t[16]);
|
||||
|
||||
#define pmaddubsw(W, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaddubsw, SSSE3, "pmaddubsw", INTRIN_NONCOMMUTATIVE, \
|
||||
W, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_ */
|
||||
|
|
35
libc/intrin/pmaddwd.c
Normal file
35
libc/intrin/pmaddwd.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmaddwd.h"
|
||||
|
||||
/**
|
||||
* Multiplies 16-bit signed integers and adds adjacent results.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaddwd)(int32_t a[4], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
a[i] = b[i * 2] * c[i * 2] + b[i * 2 + 1] * c[i * 2 + 1];
|
||||
}
|
||||
}
|
14
libc/intrin/pmaddwd.h
Normal file
14
libc/intrin/pmaddwd.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMADDWD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMADDWD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmaddwd(int32_t[4], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pmaddwd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaddwd, SSE2, "pmaddwd", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDWD_H_ */
|
39
libc/intrin/pmaxsw.c
Normal file
39
libc/intrin/pmaxsw.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmaxsw.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Gets maximum of signed 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaxsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MAX(b[i], c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pmaxsw.h
Normal file
14
libc/intrin/pmaxsw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMAXSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMAXSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmaxsw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pmaxsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaxsw, SSE2, "pmaxsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMAXSW_H_ */
|
37
libc/intrin/pmaxub.c
Normal file
37
libc/intrin/pmaxub.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmaxub.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Returns minimum of 8-bit unsigned integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaxub)(unsigned char a[16], const unsigned char b[16],
|
||||
const unsigned char c[16]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
a[i] = MAX(b[i], c[i]);
|
||||
}
|
||||
}
|
15
libc/intrin/pmaxub.h
Normal file
15
libc/intrin/pmaxub.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMAXUB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMAXUB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmaxub(unsigned char[16], const unsigned char[16],
|
||||
const unsigned char[16]);
|
||||
|
||||
#define pmaxub(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaxub, SSE2, "pmaxub", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMAXUB_H_ */
|
39
libc/intrin/pminsw.c
Normal file
39
libc/intrin/pminsw.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pminsw.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Gets minimum of signed 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pminsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = MIN(b[i], c[i]);
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pminsw.h
Normal file
14
libc/intrin/pminsw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMINSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMINSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pminsw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pminsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pminsw, SSE2, "pminsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMINSW_H_ */
|
37
libc/intrin/pminub.c
Normal file
37
libc/intrin/pminub.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pminub.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Returns minimum of 8-bit unsigned integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pminub)(unsigned char a[16], const unsigned char b[16],
|
||||
const unsigned char c[16]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; ++i) {
|
||||
a[i] = MIN(b[i], c[i]);
|
||||
}
|
||||
}
|
15
libc/intrin/pminub.h
Normal file
15
libc/intrin/pminub.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMINUB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMINUB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pminub(unsigned char[16], const unsigned char[16],
|
||||
const unsigned char[16]);
|
||||
|
||||
#define pminub(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pminub, SSE2, "pminub", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMINUB_H_ */
|
28
libc/intrin/pmovmskb.c
Normal file
28
libc/intrin/pmovmskb.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
|
||||
uint32_t(pmovmskb)(const uint8_t p[16]) {
|
||||
uint32_t i, m;
|
||||
for (m = i = 0; i < 16; ++i) {
|
||||
if (p[i] & 0x80) m |= 1 << i;
|
||||
}
|
||||
return m;
|
||||
}
|
27
libc/intrin/pmovmskb.h
Normal file
27
libc/intrin/pmovmskb.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
uint32_t pmovmskb(const uint8_t[16]);
|
||||
|
||||
#define pmovmskb(A) \
|
||||
({ \
|
||||
uint32_t Mask; \
|
||||
if (!IsModeDbg() && X86_HAVE(SSE2)) { \
|
||||
const __intrin_xmm_t *Xmm = (const __intrin_xmm_t *)(A); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("pmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
|
||||
} else { \
|
||||
asm("vpmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
|
||||
} \
|
||||
} else { \
|
||||
Mask = pmovmskb(A); \
|
||||
} \
|
||||
Mask; \
|
||||
})
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_ */
|
|
@ -18,17 +18,19 @@
|
|||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulhrsw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies Q15 numbers.
|
||||
*
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @note a.k.a. packed multiply high w/ round & scale
|
||||
* @see Q2F(15,𝑥), F2Q(15,𝑥)
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulhrsw)(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
|
||||
}
|
||||
void(pmulhrsw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) r[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
|
||||
memcpy(a, r, 16);
|
||||
}
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
#define COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmulhrsw(short a[8], const short b[8], const short c[8]);
|
||||
void pmulhrsw(int16_t a[8], const int16_t b[8], const int16_t c[8]);
|
||||
|
||||
#define pmulhrsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulhrsw, SSSE3, "pmulhrsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_ */
|
||||
|
|
38
libc/intrin/pmulhuw.c
Normal file
38
libc/intrin/pmulhuw.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulhuw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies 16-bit unsigned integers and stores high word.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulhuw)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
||||
unsigned i;
|
||||
uint16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = ((b[i] * c[i]) & 0xffff0000) >> 16;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pmulhuw.h
Normal file
14
libc/intrin/pmulhuw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHUW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULHUW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmulhuw(uint16_t[8], const uint16_t[8], const uint16_t[8]);
|
||||
|
||||
#define pmulhuw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulhuw, SSE2, "pmulhuw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHUW_H_ */
|
38
libc/intrin/pmulhw.c
Normal file
38
libc/intrin/pmulhw.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulhw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies 16-bit signed integers and stores high word.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulhw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = ((b[i] * c[i]) & 0xffff0000) >> 16;
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pmulhw.h
Normal file
14
libc/intrin/pmulhw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULHW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmulhw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pmulhw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulhw, SSE2, "pmulhw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHW_H_ */
|
39
libc/intrin/pmulld.c
Normal file
39
libc/intrin/pmulld.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulld.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies 32-bit signed integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @see pmuludq()
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulld)(int32_t a[4], const int32_t b[4], const int32_t c[4]) {
|
||||
unsigned i;
|
||||
int32_t r[4];
|
||||
for (i = 0; i < 4; ++i) {
|
||||
r[i] = b[i] * c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pmulld.h
Normal file
14
libc/intrin/pmulld.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULLD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULLD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmulld(int32_t[4], const int32_t[4], const int32_t[4]);
|
||||
|
||||
#define pmulld(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulld, SSE4_1, "pmulld", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULLD_H_ */
|
38
libc/intrin/pmullw.c
Normal file
38
libc/intrin/pmullw.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmullw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies 16-bit signed integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmullw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
r[i] = b[i] * c[i];
|
||||
}
|
||||
memcpy(a, r, 16);
|
||||
}
|
14
libc/intrin/pmullw.h
Normal file
14
libc/intrin/pmullw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULLW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULLW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmullw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pmullw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmullw, SSE2, "pmullw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULLW_H_ */
|
37
libc/intrin/pmuludq.c
Normal file
37
libc/intrin/pmuludq.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmuludq.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Multiplies 32-bit unsigned integers w/ promotion.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @see pmulld()
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmuludq)(uint64_t a[2], const uint32_t b[4], const uint32_t c[4]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
a[i] = (uint64_t)b[i * 2] * c[i * 2];
|
||||
}
|
||||
}
|
14
libc/intrin/pmuludq.h
Normal file
14
libc/intrin/pmuludq.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULUDQ_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULUDQ_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pmuludq(uint64_t[2], const uint32_t[4], const uint32_t[4]);
|
||||
|
||||
#define pmuludq(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmuludq, SSE2, "pmuludq", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULUDQ_H_ */
|
35
libc/intrin/por.c
Normal file
35
libc/intrin/por.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/por.h"
|
||||
|
||||
/**
|
||||
* Ors 128-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(por)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
a[i] = b[i] | c[i];
|
||||
}
|
||||
}
|
14
libc/intrin/por.h
Normal file
14
libc/intrin/por.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_POR_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_POR_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void por(uint64_t[2], const uint64_t[2], const uint64_t[2]);
|
||||
|
||||
#define por(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(por, SSE2, "por", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_POR_H_ */
|
37
libc/intrin/psadbw.c
Normal file
37
libc/intrin/psadbw.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/psadbw.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Computes sum of absolute differences.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives sum at first index and rest is zero'd
|
||||
* @param 𝑏 [r/o] is your first unsigned byte array
|
||||
* @param 𝑐 [r/o] is your second unsigned byte array
|
||||
* @mayalias
|
||||
*/
|
||||
void(psadbw)(uint64_t a[2], const uint8_t b[16], const uint8_t c[16]) {
|
||||
unsigned i, x, y;
|
||||
for (x = i = 0; i < 8; ++i) x += ABS(b[i] - c[i]);
|
||||
for (y = 0; i < 16; ++i) y += ABS(b[i] - c[i]);
|
||||
a[0] = x;
|
||||
a[1] = y;
|
||||
}
|
14
libc/intrin/psadbw.h
Normal file
14
libc/intrin/psadbw.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PSADBW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PSADBW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void psadbw(uint64_t[2], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define psadbw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(psadbw, SSE2, "psadbw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSADBW_H_ */
|
38
libc/intrin/pshufb.c
Normal file
38
libc/intrin/pshufb.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pshufb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Shuffles and/or clears 8-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies byte vector
|
||||
* @param 𝑐 [r/o] supplies mask vector
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @note doesn't perfectly emulate mmx
|
||||
* @mayalias
|
||||
*/
|
||||
void(pshufb)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
unsigned i;
|
||||
uint8_t r[16];
|
||||
for (i = 0; i < 16; ++i) r[i] = (c[i] & 0x80) ? 0 : b[c[i] & 0x0F];
|
||||
memcpy(a, r, 16);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue