mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-30 08:18:30 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
53
libc/intrin/intrin.mk
Normal file
53
libc/intrin/intrin.mk
Normal file
|
@ -0,0 +1,53 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += LIBC_INTRIN
|
||||
|
||||
LIBC_INTRIN_ARTIFACTS += LIBC_INTRIN_A
|
||||
LIBC_INTRIN = $(LIBC_INTRIN_A_DEPS) $(LIBC_INTRIN_A)
|
||||
LIBC_INTRIN_A = o/$(MODE)/libc/intrin/intrin.a
|
||||
LIBC_INTRIN_A_HDRS = $(filter %.h,$(LIBC_INTRIN_A_FILES))
|
||||
LIBC_INTRIN_A_SRCS_S = $(filter %.S,$(LIBC_INTRIN_A_FILES))
|
||||
LIBC_INTRIN_A_SRCS_C = $(filter %.c,$(LIBC_INTRIN_A_FILES))
|
||||
LIBC_INTRIN_A_SRCS = $(LIBC_INTRIN_A_SRCS_S) $(LIBC_INTRIN_A_SRCS_C)
|
||||
LIBC_INTRIN_A_CHECKS = $(LIBC_INTRIN_A).pkg
|
||||
|
||||
LIBC_INTRIN_A_FILES := \
|
||||
$(wildcard libc/intrin/*) \
|
||||
$(wildcard libc/intrin/delegates/*)
|
||||
|
||||
LIBC_INTRIN_A_OBJS = \
|
||||
$(LIBC_INTRIN_A_SRCS:%=o/$(MODE)/%.zip.o) \
|
||||
$(LIBC_INTRIN_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(LIBC_INTRIN_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
|
||||
LIBC_INTRIN_A_CHECKS = \
|
||||
$(LIBC_INTRIN_A).pkg \
|
||||
$(LIBC_INTRIN_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
LIBC_INTRIN_A_DIRECTDEPS = \
|
||||
LIBC_STUBS \
|
||||
LIBC_NEXGEN32E
|
||||
|
||||
LIBC_INTRIN_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_INTRIN_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(LIBC_INTRIN_A): \
|
||||
libc/intrin/ \
|
||||
$(LIBC_INTRIN_A).pkg \
|
||||
$(LIBC_INTRIN_A_OBJS)
|
||||
|
||||
$(LIBC_INTRIN_A).pkg: \
|
||||
$(LIBC_INTRIN_A_OBJS) \
|
||||
$(foreach x,$(LIBC_INTRIN_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
LIBC_INTRIN_LIBS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)))
|
||||
LIBC_INTRIN_HDRS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_HDRS))
|
||||
LIBC_INTRIN_SRCS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_SRCS))
|
||||
LIBC_INTRIN_CHECKS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_CHECKS))
|
||||
LIBC_INTRIN_OBJS = $(foreach x,$(LIBC_INTRIN_ARTIFACTS),$($(x)_OBJS))
|
||||
LIBC_INTRIN_CHECKS = $(LIBC_INTRIN_HDRS:%=o/$(MODE)/%.ok)
|
||||
$(LIBC_INTRIN_OBJS): $(BUILD_FILES) libc/intrin/intrin.mk
|
||||
|
||||
.PHONY: o/$(MODE)/libc/intrin
|
||||
o/$(MODE)/libc/intrin: $(LIBC_INTRIN_CHECKS)
|
51
libc/intrin/macros.h
Normal file
51
libc/intrin/macros.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_MACROS_H_
|
||||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define INTRIN_COMMUTATIVE "%"
|
||||
#define INTRIN_NONCOMMUTATIVE
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
|
||||
typedef char __intrin_xmm_t _Vector_size(16) mayalias;
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(C); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm2), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : FLAGS "x"(*Xmm1), "x"(*Xmm2)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, C); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(ISA)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(A); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm(OP "\t%1,%0" : "=x"(*Xmm0) : "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("v" OP "\t%2,%1,%0" : "=x"(*Xmm0) : "x"(*Xmm1), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
PURE(A, B, I); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#define INTRIN_SSEVEX_X_X_X_(PURE, ISA, OP, FLAGS, A, B, C) PURE(A, B, C)
|
||||
#define INTRIN_SSEVEX_X_X_I_(PURE, ISA, OP, A, B, I) PURE(A, B, I)
|
||||
#endif /* ANSI */
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_MACROS_H_ */
|
30
libc/intrin/packsswb.h
Normal file
30
libc/intrin/packsswb.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Casts shorts to signed chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packuswb()
|
||||
* @mayalias
|
||||
*/
|
||||
static void packsswb(signed char a[16], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(127, MAX(-128, b[i]));
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i + 8] = MIN(127, MAX(-128, c[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#define packsswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packsswb, SSE2, "packsswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKSSWB_H_ */
|
30
libc/intrin/packuswb.h
Normal file
30
libc/intrin/packuswb.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Casts shorts to unsigned chars w/ saturation.
|
||||
*
|
||||
* 𝑎 ← {CLAMP[𝑏ᵢ]|𝑖∈[0,4)} ║ {CLAMP[𝑐ᵢ]|𝑖∈[4,8)}
|
||||
*
|
||||
* @see packsswb()
|
||||
* @mayalias
|
||||
*/
|
||||
static void packuswb(unsigned char a[16], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(255, MAX(0, b[i]));
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i + 8] = MIN(255, MAX(0, c[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#define packuswb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(packuswb, SSE2, "packuswb", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PACKUSWB_H_ */
|
28
libc/intrin/paddsw.h
Normal file
28
libc/intrin/paddsw.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Adds signed 16-bit integers w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @see paddw()
|
||||
* @mayalias
|
||||
*/
|
||||
static void paddsw(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, b[i] + c[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#define paddsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddsw, SSE2, "paddsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDSW_H_ */
|
28
libc/intrin/paddw.h
Normal file
28
libc/intrin/paddw.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PADDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Adds signed 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @note shorts can't overflow so ubsan won't report it when it happens
|
||||
* @see paddsw()
|
||||
* @mayalias
|
||||
*/
|
||||
static void paddw(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = b[i] + c[i];
|
||||
}
|
||||
}
|
||||
|
||||
#define paddw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(paddw, SSE2, "paddw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PADDW_H_ */
|
55
libc/intrin/palignr.h
Normal file
55
libc/intrin/palignr.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_
|
||||
#include "libc/assert.h"
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
void pvalignr(void *, const void *, const void *, size_t);
|
||||
|
||||
/**
|
||||
* Overlaps vectors.
|
||||
*
|
||||
* 𝑖= 0 means 𝑐←𝑎
|
||||
* 0<𝑖<16 means 𝑐←𝑎║𝑏
|
||||
* 𝑖=16 means 𝑐←𝑏
|
||||
* 16<𝑖<32 means 𝑐←𝑏║0
|
||||
* 𝑖≥32 means 𝑐←0
|
||||
*
|
||||
* @param 𝑖 needs to be a literal, constexpr, or embedding
|
||||
* @see pvalignr()
|
||||
* @mayalias
|
||||
*/
|
||||
static void palignr(void *c, const void *b, const void *a, size_t i) {
|
||||
char t[48];
|
||||
memcpy(t, a, 16);
|
||||
memcpy(t + 16, b, 16);
|
||||
memset(t + 32, 0, 16);
|
||||
memcpy(c, t + MIN(32, i), 16);
|
||||
}
|
||||
|
||||
#ifndef __STRICT_ANSI__
|
||||
#define palignr(C, B, A, I) \
|
||||
do { \
|
||||
if (!IsModeDbg() && X86_NEED(SSE) && X86_HAVE(SSSE3)) { \
|
||||
__intrin_xmm_t *Xmm0 = (void *)(C); \
|
||||
const __intrin_xmm_t *Xmm1 = (const __intrin_xmm_t *)(B); \
|
||||
const __intrin_xmm_t *Xmm2 = (const __intrin_xmm_t *)(A); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("palignr\t%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm2), "i"(I), "0"(*Xmm1)); \
|
||||
} else { \
|
||||
asm("vpalignr\t%3,%2,%1,%0" \
|
||||
: "=x"(*Xmm0) \
|
||||
: "x"(*Xmm1), "x"(*Xmm2), "i"(I)); \
|
||||
} \
|
||||
} else { \
|
||||
palignr(C, B, A, I); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PALIGNR_H_ */
|
14
libc/intrin/pdep.h
Normal file
14
libc/intrin/pdep.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PDEP_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/* TODO(jart): Implement polyfill. */
|
||||
#define pdep(NUMBER, BITMASK) \
|
||||
({ \
|
||||
typeof(BITMASK) ShuffledBits, Number = (NUMBER); \
|
||||
asm("pdep\t%2,%1,%0" : "=r"(ShuffledBits) : "r"(Number), "rm"(BITMASK)); \
|
||||
ShuffledBits; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PDEP_H_ */
|
14
libc/intrin/pext.h
Normal file
14
libc/intrin/pext.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PEXT_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/* TODO(jart): Implement polyfill. */
|
||||
#define pext(NUMBER, BITMASK) \
|
||||
({ \
|
||||
typeof(BITMASK) ShuffledBits, Number = (NUMBER); \
|
||||
asm("pext\t%2,%1,%0" : "=r"(ShuffledBits) : "r"(Number), "rm"(BITMASK)); \
|
||||
ShuffledBits; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PEXT_H_ */
|
46
libc/intrin/phaddsw.c
Normal file
46
libc/intrin/phaddsw.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phaddsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Adds adjacent shorts w/ saturation.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies four pairs of shorts
|
||||
* @param 𝑐 [r/o] supplies four pairs of shorts
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phaddsw)(short a[8], const short b[8], const short c[8]) {
|
||||
int i, t[8];
|
||||
t[0] = b[0] + b[1];
|
||||
t[1] = b[2] + b[3];
|
||||
t[2] = b[4] + b[5];
|
||||
t[3] = b[6] + b[7];
|
||||
t[4] = c[0] + c[1];
|
||||
t[5] = c[2] + c[3];
|
||||
t[6] = c[4] + c[5];
|
||||
t[7] = c[6] + c[7];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, t[i]));
|
||||
}
|
||||
}
|
13
libc/intrin/phaddsw.h
Normal file
13
libc/intrin/phaddsw.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
void phaddsw(short[8], const short[8], const short[8]);
|
||||
|
||||
#define phaddsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phaddsw, SSSE3, "phaddsw", INTRIN_NONCOMMUTATIVE, A, B, \
|
||||
C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDSW_H_ */
|
42
libc/intrin/phaddw.c
Normal file
42
libc/intrin/phaddw.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/phaddw.h"
|
||||
|
||||
/**
|
||||
* Adds adjacent signed 16-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 concatenated
|
||||
* @param 𝑏 [r/o] supplies four pairs of shorts
|
||||
* @param 𝑐 [r/o] supplies four pairs of shorts
|
||||
* @note goes fast w/ ssse3 (intel c. 2004, amd c. 2011)
|
||||
* @mayalias
|
||||
*/
|
||||
void(phaddw)(short a[8], const short b[8], const short c[8]) {
|
||||
short t[8];
|
||||
t[0] = b[0] + b[1];
|
||||
t[1] = b[2] + b[3];
|
||||
t[2] = b[4] + b[5];
|
||||
t[3] = b[6] + b[7];
|
||||
t[4] = c[0] + c[1];
|
||||
t[5] = c[2] + c[3];
|
||||
t[6] = c[4] + c[5];
|
||||
t[7] = c[6] + c[7];
|
||||
memcpy(a, t, sizeof(t));
|
||||
}
|
13
libc/intrin/phaddw.h
Normal file
13
libc/intrin/phaddw.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
void phaddw(short[8], const short[8], const short[8]);
|
||||
|
||||
#define phaddw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(phaddw, SSSE3, "phaddw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PHADDW_H_ */
|
43
libc/intrin/pmaddubsw.c
Normal file
43
libc/intrin/pmaddubsw.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmaddubsw.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/**
|
||||
* Multiplies bytes and adds adjacent results w/ short saturation.
|
||||
*
|
||||
* 𝑤ᵢ ← CLAMP[ 𝑏₂ᵢ𝑐₂ᵢ + 𝑏₍₂ᵢ₊₁₎𝑐₍₂ᵢ₊₁₎ ]
|
||||
*
|
||||
* @param 𝑤 [w/o] receives shorts
|
||||
* @param 𝑏 [r/o] is your byte data
|
||||
* @param 𝑐 [r/o] are your int8 coefficients
|
||||
* @note SSSE3 w/ Prescott c. 2004, Bulldozer c. 2011
|
||||
* @note greatest simd op, like, ever
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmaddubsw)(short w[8], const unsigned char b[16],
|
||||
const signed char c[16]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
w[i] = MIN(SHRT_MAX, MAX(SHRT_MIN, (c[i * 2 + 0] * b[i * 2 + 0] +
|
||||
c[i * 2 + 1] * b[i * 2 + 1])));
|
||||
}
|
||||
}
|
13
libc/intrin/pmaddubsw.h
Normal file
13
libc/intrin/pmaddubsw.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
void pmaddubsw(short[8], const unsigned char[16], const signed char[16]);
|
||||
|
||||
#define pmaddubsw(W, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmaddubsw, SSSE3, "pmaddubsw", INTRIN_NONCOMMUTATIVE, \
|
||||
W, B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMADDUBSW_H_ */
|
34
libc/intrin/pmulhrsw.c
Normal file
34
libc/intrin/pmulhrsw.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmulhrsw.h"
|
||||
|
||||
/**
|
||||
* Multiplies Q15 numbers.
|
||||
*
|
||||
* @note a.k.a. packed multiply high w/ round & scale
|
||||
* @see Q2F(15,𝑥), F2Q(15,𝑥)
|
||||
* @mayalias
|
||||
*/
|
||||
void(pmulhrsw)(short a[8], const short b[8], const short c[8]) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = (((b[i] * c[i]) >> 14) + 1) >> 1;
|
||||
}
|
||||
}
|
12
libc/intrin/pmulhrsw.h
Normal file
12
libc/intrin/pmulhrsw.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
void pmulhrsw(short a[8], const short b[8], const short c[8]);
|
||||
|
||||
#define pmulhrsw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pmulhrsw, SSSE3, "pmulhrsw", INTRIN_COMMUTATIVE, A, B, C)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMULHRSW_H_ */
|
23
libc/intrin/psraw.h
Normal file
23
libc/intrin/psraw.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/intrin/macros.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* Divides shorts by two power.
|
||||
*
|
||||
* @note c needs to be a literal, asmconstexpr, or linkconstsym
|
||||
* @mayalias
|
||||
*/
|
||||
static void psraw(short a[8], const short b[8], char c) {
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = SAR(b[i], c);
|
||||
}
|
||||
}
|
||||
|
||||
#define psraw(A, B, I) INTRIN_SSEVEX_X_X_I_(psraw, SSE2, "psraw", A, B, I)
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */
|
26
libc/intrin/repmovsb.h
Normal file
26
libc/intrin/repmovsb.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
static void repmovsb(void **dest, const void **src, size_t cx) {
|
||||
char *di = (char *)*dest;
|
||||
const char *si = (const char *)*src;
|
||||
while (cx) *di++ = *si++, cx--;
|
||||
*dest = di, *src = si;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define repmovsb(DI, SI, CX) \
|
||||
({ \
|
||||
void *Di = *(DI); \
|
||||
const void *Si = *(SI); \
|
||||
size_t Cx = (CX); \
|
||||
asm("rep movsb" \
|
||||
: "=D"(Di), "=S"(Si), "=c"(Cx), "=m"(*(char(*)[Cx])Di) \
|
||||
: "0"(Di), "1"(Si), "2"(Cx), "m"(*(const char(*)[Cx])Si)); \
|
||||
*(DI) = Di, *(SI) = Si; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_REPMOVSB_H_ */
|
131
libc/intrin/vpalignr.c
Normal file
131
libc/intrin/vpalignr.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/palignr.h"
|
||||
|
||||
/**
|
||||
* Shifts and concatenates xmm registers.
|
||||
*
|
||||
* @param i may be a non-literal
|
||||
* @see palignr()
|
||||
* @mayalias
|
||||
*/
|
||||
void pvalignr(void *p, const void *prev, const void *next, size_t i) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
palignr(p, prev, next, 0);
|
||||
break;
|
||||
case 1:
|
||||
palignr(p, prev, next, 1);
|
||||
break;
|
||||
case 2:
|
||||
palignr(p, prev, next, 2);
|
||||
break;
|
||||
case 3:
|
||||
palignr(p, prev, next, 3);
|
||||
break;
|
||||
case 4:
|
||||
palignr(p, prev, next, 4);
|
||||
break;
|
||||
case 5:
|
||||
palignr(p, prev, next, 5);
|
||||
break;
|
||||
case 6:
|
||||
palignr(p, prev, next, 6);
|
||||
break;
|
||||
case 7:
|
||||
palignr(p, prev, next, 7);
|
||||
break;
|
||||
case 8:
|
||||
palignr(p, prev, next, 8);
|
||||
break;
|
||||
case 9:
|
||||
palignr(p, prev, next, 9);
|
||||
break;
|
||||
case 10:
|
||||
palignr(p, prev, next, 10);
|
||||
break;
|
||||
case 11:
|
||||
palignr(p, prev, next, 11);
|
||||
break;
|
||||
case 12:
|
||||
palignr(p, prev, next, 12);
|
||||
break;
|
||||
case 13:
|
||||
palignr(p, prev, next, 13);
|
||||
break;
|
||||
case 14:
|
||||
palignr(p, prev, next, 14);
|
||||
break;
|
||||
case 15:
|
||||
palignr(p, prev, next, 15);
|
||||
break;
|
||||
case 16:
|
||||
palignr(p, prev, next, 16);
|
||||
break;
|
||||
case 17:
|
||||
palignr(p, prev, next, 17);
|
||||
break;
|
||||
case 18:
|
||||
palignr(p, prev, next, 18);
|
||||
break;
|
||||
case 19:
|
||||
palignr(p, prev, next, 19);
|
||||
break;
|
||||
case 20:
|
||||
palignr(p, prev, next, 20);
|
||||
break;
|
||||
case 21:
|
||||
palignr(p, prev, next, 21);
|
||||
break;
|
||||
case 22:
|
||||
palignr(p, prev, next, 22);
|
||||
break;
|
||||
case 23:
|
||||
palignr(p, prev, next, 23);
|
||||
break;
|
||||
case 24:
|
||||
palignr(p, prev, next, 24);
|
||||
break;
|
||||
case 25:
|
||||
palignr(p, prev, next, 25);
|
||||
break;
|
||||
case 26:
|
||||
palignr(p, prev, next, 26);
|
||||
break;
|
||||
case 27:
|
||||
palignr(p, prev, next, 27);
|
||||
break;
|
||||
case 28:
|
||||
palignr(p, prev, next, 28);
|
||||
break;
|
||||
case 29:
|
||||
palignr(p, prev, next, 29);
|
||||
break;
|
||||
case 30:
|
||||
palignr(p, prev, next, 30);
|
||||
break;
|
||||
case 31:
|
||||
palignr(p, prev, next, 31);
|
||||
break;
|
||||
default:
|
||||
palignr(p, prev, next, 32);
|
||||
break;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue