Initial import

This commit is contained in:
Justine Tunney 2020-06-15 07:18:57 -07:00
commit c91b3c5006
14915 changed files with 590219 additions and 0 deletions

53
libc/intrin/intrin.mk Normal file
View file

@ -0,0 +1,53 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += LIBC_INTRIN
LIBC_INTRIN_ARTIFACTS += LIBC_INTRIN_A
LIBC_INTRIN = $(LIBC_INTRIN_A_DEPS) $(LIBC_INTRIN_A)
LIBC_INTRIN_A = o/$(MODE)/libc/intrin/intrin.a
LIBC_INTRIN_A_HDRS = $(filter %.h,$(LIBC_INTRIN_A_FILES))
LIBC_INTRIN_A_SRCS_S = $(filter %.S,$(LIBC_INTRIN_A_FILES))
LIBC_INTRIN_A_SRCS_C = $(filter %.c,$(LIBC_INTRIN_A_FILES))
LIBC_INTRIN_A_SRCS = $(LIBC_INTRIN_A_SRCS_S) $(LIBC_INTRIN_A_SRCS_C)
LIBC_INTRIN_A_CHECKS = $(LIBC_INTRIN_A).pkg
LIBC_INTRIN_A_FILES := \
$(wildcard libc/intrin/*) \
$(wildcard libc/intrin/delegates/*)
LIBC_INTRIN_A_OBJS = \
$(LIBC_INTRIN_A_SRCS:%=o/$(MODE)/%.zip.o) \
$(LIBC_INTRIN_A_SRCS_S:%.S=o/$(MODE)/%.o) \
$(LIBC_INTRIN_A_SRCS_C:%.c=o/$(MODE)/%.o)
LIBC_INTRIN_A_CHECKS = \
$(LIBC_INTRIN_A).pkg \
$(LIBC_INTRIN_A_HDRS:%=o/$(MODE)/%.ok)
LIBC_INTRIN_A_DIRECTDEPS = \
LIBC_STUBS \
LIBC_NEXGEN32E
LIBC_INTRIN_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_INTRIN_A_DIRECTDEPS),$($(x))))
$(LIBC_INTRIN_A): \
libc/intrin/ \
$(LIBC_INTRIN_A).pkg \
$(LIBC_INTRIN_A_OBJS)
$(LIBC_INTRIN_A).pkg: \
$(LIBC_INTRIN_A_OBJS) \
$(foreach x,$(LIBC_INTRIN_A_DIRECTDEPS),$($(x)_A).pkg)
LIBC_INTRIN_LIBS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)))
LIBC_INTRIN_HDRS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_HDRS))
LIBC_INTRIN_SRCS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_SRCS))
LIBC_INTRIN_CHECKS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_CHECKS))
LIBC_INTRIN_OBJS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_OBJS))
LIBC_INTRIN_CHECKS = $(LIBC_INTRIN_HDRS:%=o/$(MODE)/%.ok)
$(LIBC_INTRIN_OBJS): $(BUILD_FILES) libc/intrin/intrin.mk
.PHONY: o/$(MODE)/libc/intrin
o/$(MODE)/libc/intrin: $(LIBC_INTRIN_CHECKS)

51
libc/intrin/macros.h Normal file
View file

@ -0,0 +1,51 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
#define COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
#include "libc/dce.h"
#include "libc/nexgen32e/x86feature.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define INTRIN_COMMUTATIVE "%"
#define INTRIN_NONCOMMUTATIVE
#ifndef __STRICT_ANSI__
typedef char __intrin_xmm_t _Vector_size(16) mayalias;
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) \
do { \
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(C); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm2), "0"(*Xmm1)); \
} else { \
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm1), "x"(*Xmm2)); \
} \
} else { \
PURE(A, B, C); \
} \
} while (0)
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) \
do { \
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
__intrin_xmm_t *Xmm0 = (void *)(A); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
if (!X86_NEED(AVX)) { \
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
} else { \
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
} \
} else { \
PURE(A, B, I); \
} \
} while (0)
#else
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) PURE(A, B, C)
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
#endif /* ANSI */
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_MACROS_H_ */

30
libc/intrin/packsswb.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
#include "libc/intrin/macros.h"
#include "libc/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Casts shorts to signed chars w/ saturation.
*
* 𝑎 {CLAMP[𝑏]|𝑖[0,4)} {CLAMP[𝑐]|𝑖[4,8)}
*
* @see packuswb()
* @mayalias
*/
static void packsswb(signed char a[16], const short b[8], const short c[8]) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = MIN(127, MAX(-128, b[i]));
}
for (i = 0; i < 8; ++i) {
a[i + 8] = MIN(127, MAX(-128, c[i]));
}
}
#define packsswb(A, B, C) \
INTRIN_SSEVEX_X_X_X_(packsswb, SSE2, "packsswb", INTRIN_NONCOMMUTATIVE, A, \
B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_ */

30
libc/intrin/packuswb.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
#include "libc/intrin/macros.h"
#include "libc/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Casts shorts to unsigned chars w/ saturation.
*
* 𝑎 {CLAMP[𝑏]|𝑖[0,4)} {CLAMP[𝑐]|𝑖[4,8)}
*
* @see packsswb()
* @mayalias
*/
static void packuswb(unsigned char a[16], const short b[8], const short c[8]) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = MIN(255, MAX(0, b[i]));
}
for (i = 0; i < 8; ++i) {
a[i + 8] = MIN(255, MAX(0, c[i]));
}
}
#define packuswb(A, B, C) \
INTRIN_SSEVEX_X_X_X_(packuswb, SSE2, "packuswb", INTRIN_NONCOMMUTATIVE, A, \
B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_ */

28
libc/intrin/paddsw.h Normal file
View file

@ -0,0 +1,28 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
#include "libc/intrin/macros.h"
#include "libc/limits.h"
#include "libc/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Adds signed 16-bit integers w/ saturation.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @see paddw()
* @mayalias
*/
static void paddsw(short a[8], const short b[8], const short c[8]) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, b[i] + c[i]));
}
}
#define paddsw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(paddsw, SSE2, "paddsw", INTRIN_COMMUTATIVE, A, B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_ */

28
libc/intrin/paddw.h Normal file
View file

@ -0,0 +1,28 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
#include "libc/intrin/macros.h"
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Adds signed 16-bit integers.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @note shorts can't overflow so ubsan won't report it when it happens
* @see paddsw()
* @mayalias
*/
static void paddw(short a[8], const short b[8], const short c[8]) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = b[i] + c[i];
}
}
#define paddw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(paddw, SSE2, "paddw", INTRIN_COMMUTATIVE, A, B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDW_H_ */

55
libc/intrin/palignr.h Normal file
View file

@ -0,0 +1,55 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
#include "libc/assert.h"
#include "libc/intrin/macros.h"
#include "libc/macros.h"
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void pvalignr(void *, const void *, const void *, size_t);
/**
* Overlaps vectors.
*
* 𝑖= 0 means 𝑐𝑎
* 0<𝑖<16 means 𝑐𝑎𝑏
* 𝑖=16 means 𝑐𝑏
* 16<𝑖<32 means 𝑐𝑏0
* 𝑖32 means 𝑐0
*
* @param 𝑖 needs to be a literal, constexpr, or embedding
* @see pvalignr()
* @mayalias
*/
static void palignr(void *c, const void *b, const void *a, size_t i) {
char t[48];
memcpy(t, a, 16);
memcpy(t + 16, b, 16);
memset(t + 32, 0, 16);
memcpy(c, t + MIN(32, i), 16);
}
#ifndef __STRICT_ANSI__
#define palignr(C, B, A, I) \
do { \
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3)) { \
__intrin_xmm_t *Xmm0 = (void *)(C); \
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
if (!X86_NEED(AVX)) { \
asm("palignr\t%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
} else { \
asm("vpalignr\t%3,%2,%1,%0" \
: "=x"(*Xmm0) \
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
} \
} else { \
palignr(C, B, A, I); \
} \
} while (0)
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */

14
libc/intrin/pdep.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
#define COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/* TODO(jart): Implement polyfill. */
#define pdep(NUMBER, BITMASK) \
({ \
typeof(BITMASK) ShuffledBits, Number = (NUMBER); \
asm("pdep\t%2,%1,%0" : "=r"(ShuffledBits) : "r"(Number), "rm"(BITMASK)); \
ShuffledBits; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PDEP_H_ */

14
libc/intrin/pext.h Normal file
View file

@ -0,0 +1,14 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
#define COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/* TODO(jart): Implement polyfill. */
#define pext(NUMBER, BITMASK) \
({ \
typeof(BITMASK) ShuffledBits, Number = (NUMBER); \
asm("pext\t%2,%1,%0" : "=r"(ShuffledBits) : "r"(Number), "rm"(BITMASK)); \
ShuffledBits; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PEXT_H_ */

46
libc/intrin/phaddsw.c Normal file
View file

@ -0,0 +1,46 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/phaddsw.h"
#include "libc/limits.h"
#include "libc/macros.h"
/**
* Adds adjacent shorts w/ saturation.
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
* @param 𝑏 [r/o] supplies four pairs of shorts
* @param 𝑐 [r/o] supplies four pairs of shorts
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
* @mayalias
*/
void(phaddsw)(short a[8], const short b[8], const short c[8]) {
int i, t[8];
t[0] = b[0] + b[1];
t[1] = b[2] + b[3];
t[2] = b[4] + b[5];
t[3] = b[6] + b[7];
t[4] = c[0] + c[1];
t[5] = c[2] + c[3];
t[6] = c[4] + c[5];
t[7] = c[6] + c[7];
for (i = 0; i < 8; ++i) {
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, t[i]));
}
}

13
libc/intrin/phaddsw.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_
#include "libc/intrin/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void phaddsw(short[8], const short[8], const short[8]);
#define phaddsw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(phaddsw, SSSE3, "phaddsw", INTRIN_NONCOMMUTATIVE, A, B, \
C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_ */

42
libc/intrin/phaddw.c Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/phaddw.h"
/**
* Adds adjacent signed 16-bit integers.
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
* @param 𝑏 [r/o] supplies four pairs of shorts
* @param 𝑐 [r/o] supplies four pairs of shorts
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
* @mayalias
*/
void(phaddw)(short a[8], const short b[8], const short c[8]) {
short t[8];
t[0] = b[0] + b[1];
t[1] = b[2] + b[3];
t[2] = b[4] + b[5];
t[3] = b[6] + b[7];
t[4] = c[0] + c[1];
t[5] = c[2] + c[3];
t[6] = c[4] + c[5];
t[7] = c[6] + c[7];
memcpy(a, t, sizeof(t));
}

13
libc/intrin/phaddw.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
#include "libc/intrin/macros.h"
#include "libc/str/str.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void phaddw(short[8], const short[8], const short[8]);
#define phaddw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(phaddw, SSSE3, "phaddw", INTRIN_NONCOMMUTATIVE, A, B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_ */

43
libc/intrin/pmaddubsw.c Normal file
View file

@ -0,0 +1,43 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/pmaddubsw.h"
#include "libc/limits.h"
#include "libc/macros.h"
/**
* Multiplies bytes and adds adjacent results w/ short saturation.
*
* 𝑤 CLAMP[ 𝑏𝑐 + 𝑏𝑐 ]
*
* @param 𝑤 [w/o] receives shorts
* @param 𝑏 [r/o] is your byte data
* @param 𝑐 [r/o] are your int8 coefficients
* @note SSSE3 w/ Prescott c. 2004, Bulldozer c. 2011
* @note greatest simd op, like, ever
* @mayalias
*/
void(pmaddubsw)(short w[8], const unsigned char b[16],
const signed char c[16]) {
int i;
for (i = 0; i < 8; ++i) {
w[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, (c[i * 2 + 0] * b[i * 2 + 0] +
c[i * 2 + 1] * b[i * 2 + 1])));
}
}

13
libc/intrin/pmaddubsw.h Normal file
View file

@ -0,0 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
#include "libc/intrin/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void pmaddubsw(short[8], const unsigned char[16], const signed char[16]);
#define pmaddubsw(W, B, C) \
INTRIN_SSEVEX_X_X_X_(pmaddubsw, SSSE3, "pmaddubsw", INTRIN_NONCOMMUTATIVE, \
W, B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_ */

34
libc/intrin/pmulhrsw.c Normal file
View file

@ -0,0 +1,34 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/pmulhrsw.h"
/**
* Multiplies Q15 numbers.
*
* @note a.k.a. packed multiply high w/ round & scale
* @see Q2F(15,𝑥), F2Q(15,𝑥)
* @mayalias
*/
void(pmulhrsw)(short a[8], const short b[8], const short c[8]) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
}
}

12
libc/intrin/pmulhrsw.h Normal file
View file

@ -0,0 +1,12 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
#include "libc/intrin/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
void pmulhrsw(short a[8], const short b[8], const short c[8]);
#define pmulhrsw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(pmulhrsw, SSSE3, "pmulhrsw", INTRIN_COMMUTATIVE, A, B, C)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_ */

23
libc/intrin/psraw.h Normal file
View file

@ -0,0 +1,23 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
#include "libc/bits/bits.h"
#include "libc/intrin/macros.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
/**
* Divides shorts by two power.
*
* @note c needs to be a literal, asmconstexpr, or linkconstsym
* @mayalias
*/
static void psraw(short a[8], const short b[8], char c) {
int i;
for (i = 0; i < 8; ++i) {
a[i] = SAR(b[i], c);
}
}
#define psraw(A, B, I) INTRIN_SSEVEX_X_X_I_(psraw, SSE2, "psraw", A, B, I)
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */

26
libc/intrin/repmovsb.h Normal file
View file

@ -0,0 +1,26 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_
#define COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
static void repmovsb(void **dest, const void **src, size_t cx) {
char *di = (char *)*dest;
const char *si = (const char *)*src;
while (cx) *di++ = *si++, cx--;
*dest = di, *src = si;
}
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
#define repmovsb(DI, SI, CX) \
({ \
void *Di = *(DI); \
const void *Si = *(SI); \
size_t Cx = (CX); \
asm("rep movsb" \
: "=D"(Di), "=S"(Si), "=c"(Cx), "=m"(*(char(*)[Cx])Di) \
: "0"(Di), "1"(Si), "2"(Cx), "m"(*(const char(*)[Cx])Si)); \
*(DI) = Di, *(SI) = Si; \
})
#endif
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_ */

131
libc/intrin/vpalignr.c Normal file
View file

@ -0,0 +1,131 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/palignr.h"
/**
* Shifts and concatenates xmm registers.
*
* @param i may be a non-literal
* @see palignr()
* @mayalias
*/
void pvalignr(void *p, const void *prev, const void *next, size_t i) {
switch (i) {
case 0:
palignr(p, prev, next, 0);
break;
case 1:
palignr(p, prev, next, 1);
break;
case 2:
palignr(p, prev, next, 2);
break;
case 3:
palignr(p, prev, next, 3);
break;
case 4:
palignr(p, prev, next, 4);
break;
case 5:
palignr(p, prev, next, 5);
break;
case 6:
palignr(p, prev, next, 6);
break;
case 7:
palignr(p, prev, next, 7);
break;
case 8:
palignr(p, prev, next, 8);
break;
case 9:
palignr(p, prev, next, 9);
break;
case 10:
palignr(p, prev, next, 10);
break;
case 11:
palignr(p, prev, next, 11);
break;
case 12:
palignr(p, prev, next, 12);
break;
case 13:
palignr(p, prev, next, 13);
break;
case 14:
palignr(p, prev, next, 14);
break;
case 15:
palignr(p, prev, next, 15);
break;
case 16:
palignr(p, prev, next, 16);
break;
case 17:
palignr(p, prev, next, 17);
break;
case 18:
palignr(p, prev, next, 18);
break;
case 19:
palignr(p, prev, next, 19);
break;
case 20:
palignr(p, prev, next, 20);
break;
case 21:
palignr(p, prev, next, 21);
break;
case 22:
palignr(p, prev, next, 22);
break;
case 23:
palignr(p, prev, next, 23);
break;
case 24:
palignr(p, prev, next, 24);
break;
case 25:
palignr(p, prev, next, 25);
break;
case 26:
palignr(p, prev, next, 26);
break;
case 27:
palignr(p, prev, next, 27);
break;
case 28:
palignr(p, prev, next, 28);
break;
case 29:
palignr(p, prev, next, 29);
break;
case 30:
palignr(p, prev, next, 30);
break;
case 31:
palignr(p, prev, next, 31);
break;
default:
palignr(p, prev, next, 32);
break;
}
}