Get rid of some legacy code

This commit is contained in:
Justine Tunney 2024-08-24 17:53:30 -07:00
parent 37ca1badaf
commit 38cc4b3c68
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
27 changed files with 123 additions and 600 deletions

View file

@ -45,6 +45,12 @@ $(DSP_SCALE_A).pkg: \
$(DSP_SCALE_A_OBJS) \ $(DSP_SCALE_A_OBJS) \
$(foreach x,$(DSP_SCALE_A_DIRECTDEPS),$($(x)_A).pkg) $(foreach x,$(DSP_SCALE_A_DIRECTDEPS),$($(x)_A).pkg)
ifeq ($(ARCH),x86_64)
o/$(MODE)/dsp/scale/cdecimate2xuint8x8.o: private \
CFLAGS += \
-mssse3
endif
o/$(MODE)/dsp/scale/cdecimate2xuint8x8.o \ o/$(MODE)/dsp/scale/cdecimate2xuint8x8.o \
o/$(MODE)/dsp/scale/gyarados.o \ o/$(MODE)/dsp/scale/gyarados.o \
o/$(MODE)/dsp/scale/magikarp.o \ o/$(MODE)/dsp/scale/magikarp.o \

View file

@ -1,34 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pandn.h"
/**
* Nands 128-bit integers.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @mayalias
*/
void(pandn)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
unsigned i;
for (i = 0; i < 2; ++i) {
a[i] = ~b[i] & c[i];
}
}

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
#define COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void pandn(uint64_t[2], const uint64_t[2], const uint64_t[2]);
#define pandn(A, B, C) \
INTRIN_SSEVEX_X_X_X_(pandn, SSE2, "pandn", INTRIN_NONCOMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PANDN_H_ */

View file

@ -1,38 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pcmpgtb.h"
#include "libc/str/str.h"
/**
* Compares signed 8-bit integers w/ greater than predicate.
*
* Note that operands can be xor'd with 0x80 for unsigned compares.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @mayalias
*/
void(pcmpgtb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
unsigned i;
int8_t r[16];
for (i = 0; i < 16; ++i)
r[i] = -(b[i] > c[i]);
__builtin_memcpy(a, r, 16);
}

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void pcmpgtb(int8_t[16], const int8_t[16], const int8_t[16]);
#define pcmpgtb(A, B, C) \
INTRIN_SSEVEX_X_X_X_(pcmpgtb, SSE2, "pcmpgtb", INTRIN_NONCOMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_ */

View file

@ -1,36 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pcmpgtw.h"
#include "libc/str/str.h"
/**
* Compares signed 16-bit integers w/ greater than predicate.
*
* @param 𝑎 [w/o] receives result
* @param 𝑏 [r/o] supplies first input vector
* @param 𝑐 [r/o] supplies second input vector
* @mayalias
*/
void(pcmpgtw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
unsigned i;
int16_t r[8];
for (i = 0; i < 8; ++i)
r[i] = -(b[i] > c[i]);
__builtin_memcpy(a, r, 16);
}

View file

@ -1,12 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void pcmpgtw(int16_t[8], const int16_t[8], const int16_t[8]);
#define pcmpgtw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(pcmpgtw, SSE2, "pcmpgtw", INTRIN_NONCOMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_ */

View file

@ -1,34 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/pmovmskb.h"
/**
* Turns result of byte comparison into bitmask.
*
* @param 𝑝 is byte vector to crunch
* @see pcmpeqb(), bsf(), etc.
*/
uint32_t(pmovmskb)(const uint8_t p[16]) {
uint32_t i, m;
for (m = i = 0; i < 16; ++i) {
if (p[i] & 0x80)
m |= 1 << i;
}
return m;
}

View file

@ -1,27 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
#define COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
uint32_t pmovmskb(const uint8_t[16]);
#if defined(__x86_64__) && defined(__GNUC__)
#define pmovmskb(A) \
({ \
uint32_t Mask; \
if (!IsModeDbg() && X86_HAVE(SSE2)) { \
const __intrin_xmm_t *Xmm = (const __intrin_xmm_t *)(A); \
if (!X86_NEED(AVX)) { \
asm("pmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
} else { \
asm("vpmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
} \
} else { \
Mask = pmovmskb(A); \
} \
Mask; \
})
#endif
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_ */

View file

@ -4,11 +4,8 @@
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
void psraw(int16_t[8], const int16_t[8], unsigned char) libcesque; void psraw(int16_t[8], const int16_t[8], unsigned char) libcesque;
void psrawv(int16_t[8], const int16_t[8], const uint64_t[2]) libcesque;
#define psraw(A, B, I) INTRIN_SSEVEX_X_I_(psraw, SSE2, "psraw", A, B, I) #define psraw(A, B, I) INTRIN_SSEVEX_X_I_(psraw, SSE2, "psraw", A, B, I)
#define psrawv(A, B, C) \
INTRIN_SSEVEX_X_X_X_(psrawv, SSE2, "psraw", INTRIN_NONCOMMUTATIVE, A, B, C)
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */ #endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */

View file

@ -1,34 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/psraw.h"
/**
* Divides shorts by two power.
*
* @note arithmetic shift right will sign extend negatives
* @mayalias
*/
void(psrawv)(int16_t a[8], const int16_t b[8], const uint64_t c[2]) {
unsigned i;
unsigned char k;
k = c[0] > 15 ? 15 : c[0];
for (i = 0; i < 8; ++i) {
a[i] = b[i] >> k;
}
}

View file

@ -1,46 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/punpckhbw.h"
/**
* Interleaves high bytes.
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
* @param 𝑏 [r/o] supplies eight words
* @param 𝑐 [r/o] supplies eight words
* @mayalias
*/
void(punpckhbw)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
a[0x0] = b[0x8];
a[0x1] = c[0x8];
a[0x2] = b[0x9];
a[0x3] = c[0x9];
a[0x4] = b[0xa];
a[0x5] = c[0xa];
a[0x6] = b[0xb];
a[0x7] = c[0xb];
a[0x8] = b[0xc];
a[0x9] = c[0xc];
a[0xa] = b[0xd];
a[0xb] = c[0xd];
a[0xc] = b[0xe];
a[0xd] = c[0xe];
a[0xe] = b[0xf];
a[0xf] = c[0xf];
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void punpckhbw(uint8_t[16], const uint8_t[16], const uint8_t[16]);
#define punpckhbw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(punpckhbw, SSE2, "punpckhbw", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_ */

View file

@ -1,49 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/punpckhwd.h"
#include "libc/str/str.h"
/**
* Interleaves high words.
*
* 0 1 2 3 4 5 6 7
* B aa bb cc dd EE FF GG HH
* C ii jj kk ll MM NN OO PP
*
*
*
*
*
* A EE MM FF NN GG OO HH PP
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
* @param 𝑏 [r/o] supplies eight words
* @param 𝑐 [r/o] supplies eight words
* @mayalias
*/
void(punpckhwd)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
a[0] = b[4];
a[1] = c[4];
a[2] = b[5];
a[3] = c[5];
a[4] = b[6];
a[5] = c[6];
a[6] = b[7];
a[7] = c[7];
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void punpckhwd(uint16_t[8], const uint16_t[8], const uint16_t[8]);
#define punpckhwd(A, B, C) \
INTRIN_SSEVEX_X_X_X_(punpckhwd, SSE2, "punpckhwd", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_ */

View file

@ -1,56 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/punpcklbw.h"
/**
* Interleaves low bytes.
*
* 0 1 2 3 4 5 6 7 8 9 A B C D E F
* B A B C D E F G H i j k l m n o p
* C Q R S T U V W X y z α σ π μ τ ε
*
*
* etc...
*
*
* A A Q B R C S D T E U F V G W H X
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
* @param 𝑏 [r/o] supplies eight words
* @param 𝑐 [r/o] supplies eight words
* @mayalias
*/
void(punpcklbw)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
a[0xf] = c[7];
a[0xe] = b[7];
a[0xd] = c[6];
a[0xc] = b[6];
a[0xb] = c[5];
a[0xa] = b[5];
a[0x9] = c[4];
a[0x8] = b[4];
a[0x7] = c[3];
a[0x6] = b[3];
a[0x5] = c[2];
a[0x4] = b[2];
a[0x3] = c[1];
a[0x2] = b[1];
a[0x1] = c[0];
a[0x0] = b[0];
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void punpcklbw(uint8_t[16], const uint8_t[16], const uint8_t[16]);
#define punpcklbw(A, B, C) \
INTRIN_SSEVEX_X_X_X_(punpcklbw, SSE2, "punpcklbw", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_ */

View file

@ -1,48 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/punpcklwd.h"
/**
* Interleaves low words.
*
* 0 1 2 3 4 5 6 7
* B AA BB CC DD ee ff gg hh
* C II JJ KK LL mm nn oo pp
*
*
*
*
*
* A AA II BB JJ CC KK DD LL
*
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
* @param 𝑏 [r/o] supplies eight words
* @param 𝑐 [r/o] supplies eight words
* @mayalias
*/
void(punpcklwd)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
a[7] = c[3];
a[6] = b[3];
a[5] = c[2];
a[4] = b[2];
a[3] = c[1];
a[2] = b[1];
a[1] = c[0];
a[0] = b[0];
}

View file

@ -1,13 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_
#include "libc/intrin/macros.h"
COSMOPOLITAN_C_START_
void punpcklwd(uint16_t[8], const uint16_t[8], const uint16_t[8]);
#define punpcklwd(A, B, C) \
INTRIN_SSEVEX_X_X_X_(punpcklwd, SSE2, "punpcklwd", INTRIN_NONCOMMUTATIVE, A, \
B, C)
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_ */

View file

@ -17,8 +17,6 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/bsf.h" #include "libc/intrin/bsf.h"
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/macros.h" #include "libc/macros.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/thompike.h" #include "libc/str/thompike.h"

View file

@ -18,35 +18,55 @@
*/ */
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/fmt/conv.h" #include "libc/fmt/conv.h"
#include "libc/intrin/packsswb.h"
#include "libc/intrin/pandn.h"
#include "libc/intrin/pcmpgtw.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/utf16.h" #include "libc/str/utf16.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/emmintrin.internal.h"
static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127}; #if !IsModeDbg()
#if defined(__x86_64__)
/* 10x speedup for ascii */
static axdx_t tprecode16to8_sse2(char *dst, size_t dstsize, const char16_t *src, static axdx_t tprecode16to8_sse2(char *dst, size_t dstsize, const char16_t *src,
axdx_t r) { axdx_t r) {
int16_t v1[8], v2[8], v3[8], vz[8]; __m128i v1, v2, v3, vz;
memset(vz, 0, 16); vz = _mm_setzero_si128();
while (r.ax + 8 < dstsize) { while (r.ax + 8 < dstsize) {
memcpy(v1, src + r.dx, 16); v1 = _mm_loadu_si128((__m128i *)(src + r.dx));
pcmpgtw(v2, v1, vz); v2 = _mm_cmpgt_epi16(v1, vz);
pcmpgtw(v3, v1, kDel16); v3 = _mm_cmpgt_epi16(v1, _mm_set1_epi16(0x7F));
pandn((void *)v2, (void *)v3, (void *)v2); v2 = _mm_andnot_si128(v3, v2);
if (pmovmskb((void *)v2) != 0xFFFF) if (_mm_movemask_epi8(v2) != 0xFFFF)
break; break;
packsswb((void *)v1, v1, v1); v1 = _mm_packs_epi16(v1, v1);
memcpy(dst + r.ax, v1, 8); _mm_storel_epi64((__m128i *)(dst + r.ax), v1);
r.ax += 8; r.ax += 8;
r.dx += 8; r.dx += 8;
} }
return r; return r;
} }
#elif defined(__aarch64__)
static axdx_t tprecode16to8_neon(char *dst, size_t dstsize, const char16_t *src,
axdx_t r) {
uint16x8_t v1, v2, v3;
while (r.ax + 8 < dstsize) {
v1 = vld1q_u16((const uint16_t *)(src + r.dx));
v2 = vcgtq_u16(v1, vdupq_n_u16(0));
v3 = vcgtq_u16(v1, vdupq_n_u16(0x7F));
v2 = vbicq_u16(v2, v3);
if (vaddvq_u16(v2) != 8 * 0xFFFF)
break;
vst1_u8((uint8_t *)(dst + r.ax), vqmovn_u16(v1));
r.ax += 8;
r.dx += 8;
}
return r;
}
#endif
#endif
/** /**
* Transcodes UTF-16 to UTF-8. * Transcodes UTF-16 to UTF-8.
* *
@ -66,10 +86,14 @@ axdx_t tprecode16to8(char *dst, size_t dstsize, const char16_t *src) {
r.ax = 0; r.ax = 0;
r.dx = 0; r.dx = 0;
for (;;) { for (;;) {
#if defined(__x86_64__) && !IsModeDbg() && !IsTiny() #if !IsModeDbg()
if (!((uintptr_t)(src + r.dx) & 15)) { #if defined(__x86_64__)
if (!((uintptr_t)(src + r.dx) & 15))
r = tprecode16to8_sse2(dst, dstsize, src, r); r = tprecode16to8_sse2(dst, dstsize, src, r);
} #elif defined(__aarch64__)
if (!((uintptr_t)(src + r.dx) & 15))
r = tprecode16to8_neon(dst, dstsize, src, r);
#endif
#endif #endif
if (!(x = src[r.dx++])) if (!(x = src[r.dx++]))
break; break;

View file

@ -16,34 +16,61 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/pcmpgtb.h" #include <arm_neon.h>
#include "libc/intrin/pmovmskb.h" #include <stdint.h>
#include "libc/intrin/punpckhbw.h" #include <string.h>
#include "libc/intrin/punpcklbw.h" #include "libc/dce.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/thompike.h" #include "libc/str/thompike.h"
#include "libc/str/utf16.h" #include "libc/str/utf16.h"
#include "third_party/aarch64/arm_neon.internal.h"
#include "third_party/intel/emmintrin.internal.h"
#if !IsModeDbg()
#if defined(__x86_64__)
// 34x speedup for ascii
static inline axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize, static inline axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize,
const char *src, axdx_t r) { const char *src, axdx_t r) {
uint8_t v1[16], v2[16], vz[16]; __m128i v1, v2, vz;
memset(vz, 0, 16); vz = _mm_setzero_si128();
while (r.ax + 16 < dstsize) { while (r.ax + 16 < dstsize) {
memcpy(v1, src + r.dx, 16); v1 = _mm_loadu_si128((__m128i *)(src + r.dx));
pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz); v2 = _mm_cmpgt_epi8(v1, vz);
if (pmovmskb(v2) != 0xFFFF) if (_mm_movemask_epi8(v2) != 0xFFFF)
break; break;
punpcklbw(v2, v1, vz); __m128i lo = _mm_unpacklo_epi8(v1, vz);
punpckhbw(v1, v1, vz); __m128i hi = _mm_unpackhi_epi8(v1, vz);
memcpy(dst + r.ax + 0, v2, 16); _mm_storeu_si128((__m128i *)(dst + r.ax), lo);
memcpy(dst + r.ax + 8, v1, 16); _mm_storeu_si128((__m128i *)(dst + r.ax + 8), hi);
r.ax += 16; r.ax += 16;
r.dx += 16; r.dx += 16;
} }
return r; return r;
} }
#elif defined(__aarch64__)
static inline axdx_t tprecode8to16_neon(char16_t *dst, size_t dstsize,
const char *src, axdx_t r) {
uint8x16_t v1;
while (r.ax + 16 < dstsize) {
v1 = vld1q_u8((const uint8_t *)(src + r.dx));
uint8x16_t cmp = vcgtq_u8(v1, vdupq_n_u8(0));
if (vaddvq_u8(cmp) != 16 * 0xFF)
break;
uint16x8_t lo = vmovl_u8(vget_low_u8(v1));
uint16x8_t hi = vmovl_u8(vget_high_u8(v1));
vst1q_u16((uint16_t *)(dst + r.ax), lo);
vst1q_u16((uint16_t *)(dst + r.ax + 8), hi);
r.ax += 16;
r.dx += 16;
}
return r;
}
#endif
#endif
/** /**
* Transcodes UTF-8 to UTF-16. * Transcodes UTF-8 to UTF-16.
* *
@ -64,10 +91,14 @@ axdx_t tprecode8to16(char16_t *dst, size_t dstsize, const char *src) {
r.ax = 0; r.ax = 0;
r.dx = 0; r.dx = 0;
for (;;) { for (;;) {
#if defined(__x86_64__) && !IsModeDbg() #if !IsModeDbg()
if (!((uintptr_t)(src + r.dx) & 15)) { #if defined(__x86_64__)
if (!((uintptr_t)(src + r.dx) & 15))
r = tprecode8to16_sse2(dst, dstsize, src, r); r = tprecode8to16_sse2(dst, dstsize, src, r);
} #elif defined(__aarch64__)
if (!((uintptr_t)(src + r.dx) & 15))
r = tprecode8to16_neon(dst, dstsize, src, r);
#endif
#endif #endif
x = src[r.dx++] & 0377; x = src[r.dx++] & 0377;
if (x >= 0300) { if (x >= 0300) {

View file

@ -17,21 +17,13 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/bsr.h" #include "libc/intrin/bsr.h"
#include "libc/intrin/packsswb.h"
#include "libc/intrin/pandn.h"
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pcmpgtw.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/intrin/punpckhbw.h"
#include "libc/intrin/punpcklbw.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/serialize.h" #include "libc/serialize.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/thompike.h" #include "libc/str/thompike.h"
#include "libc/str/utf16.h" #include "libc/str/utf16.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "third_party/intel/emmintrin.internal.h"
static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127};
/** /**
* Transcodes UTF-16 to UTF-8. * Transcodes UTF-16 to UTF-8.
@ -45,28 +37,27 @@ char *utf16to8(const char16_t *p, size_t n, size_t *z) {
char *r, *q; char *r, *q;
wint_t x, y; wint_t x, y;
const char16_t *e; const char16_t *e;
int16_t v1[8], v2[8], v3[8], vz[8];
if (z) if (z)
*z = 0; *z = 0;
if (n == -1) if (n == -1)
n = p ? strlen16(p) : 0; n = p ? strlen16(p) : 0;
if ((q = r = malloc(n * 4 + 8 + 1))) { if ((q = r = malloc(n * 4 + 8 + 1))) {
for (e = p + n; p < e;) { for (e = p + n; p < e;) {
if (p + 8 < e) { /* 17x ascii */ #if defined(__x86_64__)
bzero(vz, 16); if (p + 8 < e) {
do { do {
memcpy(v1, p, 16); __m128i v1 = _mm_loadu_si128((__m128i *)p);
pcmpgtw(v2, v1, vz); __m128i v2 = _mm_cmpgt_epi16(v1, _mm_setzero_si128());
pcmpgtw(v3, v1, kDel16); __m128i v3 = _mm_cmpgt_epi16(v1, _mm_set1_epi16(127));
pandn((void *)v2, (void *)v3, (void *)v2); v2 = _mm_andnot_si128(v3, v2);
if (pmovmskb((void *)v2) != 0xFFFF) if (_mm_movemask_epi8(v2) != 0xFFFF)
break; break;
packsswb((void *)v1, v1, v1); _mm_storel_epi64((__m128i *)q, _mm_packs_epi16(v1, v1));
memcpy(q, v1, 8);
p += 8; p += 8;
q += 8; q += 8;
} while (p + 8 < e); } while (p + 8 < e);
} }
#endif
x = *p++ & 0xffff; x = *p++ & 0xffff;
if (!IsUcs2(x)) { if (!IsUcs2(x)) {
if (p < e) { if (p < e) {

View file

@ -16,18 +16,12 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/likely.h"
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/intrin/punpckhbw.h"
#include "libc/intrin/punpckhwd.h"
#include "libc/intrin/punpcklbw.h"
#include "libc/intrin/punpcklwd.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/thompike.h" #include "libc/str/thompike.h"
#include "libc/str/utf16.h" #include "libc/str/utf16.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "third_party/intel/emmintrin.internal.h"
/** /**
* Transcodes UTF-8 to UTF-32. * Transcodes UTF-8 to UTF-32.
@ -41,35 +35,35 @@ wchar_t *utf8to32(const char *p, size_t n, size_t *z) {
unsigned m, j; unsigned m, j;
wint_t x, a, b; wint_t x, a, b;
wchar_t *r, *q; wchar_t *r, *q;
uint8_t v1[16], v2[16], v3[16], v4[16], vz[16];
if (z) if (z)
*z = 0; *z = 0;
if (n == -1) if (n == -1)
n = p ? strlen(p) : 0; n = p ? strlen(p) : 0;
if ((q = r = malloc(n * sizeof(wchar_t) + sizeof(wchar_t)))) { if ((q = r = malloc(n * sizeof(wchar_t) + sizeof(wchar_t)))) {
for (i = 0; i < n;) { for (i = 0; i < n;) {
#ifdef __x86_64__
if (!((uintptr_t)(p + i) & 15) && i + 16 < n) { if (!((uintptr_t)(p + i) & 15) && i + 16 < n) {
/* 10x speedup for ascii */
bzero(vz, 16);
do { do {
memcpy(v1, p + i, 16); __m128i v1, v2, v3, v4;
pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz); v1 = _mm_loadu_si128((__m128i *)(p + i));
if (pmovmskb(v2) != 0xFFFF) v2 = _mm_cmpgt_epi8(v1, _mm_setzero_si128());
if (_mm_movemask_epi8(v2) != 0xFFFF)
break; break;
punpcklbw(v3, v1, vz); v3 = _mm_unpacklo_epi8(v1, _mm_setzero_si128());
punpckhbw(v1, v1, vz); v1 = _mm_unpackhi_epi8(v1, _mm_setzero_si128());
punpcklwd((void *)v4, (void *)v3, (void *)vz); v4 = _mm_unpacklo_epi16(v3, _mm_setzero_si128());
punpckhwd((void *)v3, (void *)v3, (void *)vz); v3 = _mm_unpackhi_epi16(v3, _mm_setzero_si128());
punpcklwd((void *)v2, (void *)v1, (void *)vz); v2 = _mm_unpacklo_epi16(v1, _mm_setzero_si128());
punpckhwd((void *)v1, (void *)v1, (void *)vz); v1 = _mm_unpackhi_epi16(v1, _mm_setzero_si128());
memcpy(q + 0, v4, 16); _mm_storeu_si128((__m128i *)(q + 0), v4);
memcpy(q + 4, v3, 16); _mm_storeu_si128((__m128i *)(q + 4), v3);
memcpy(q + 8, v2, 16); _mm_storeu_si128((__m128i *)(q + 8), v2);
memcpy(q + 12, v1, 16); _mm_storeu_si128((__m128i *)(q + 12), v1);
i += 16; i += 16;
q += 16; q += 16;
} while (i + 16 < n); } while (i + 16 < n);
} }
#endif
x = p[i++] & 0xff; x = p[i++] & 0xff;
if (x >= 0300) { if (x >= 0300) {
a = ThomPikeByte(x); a = ThomPikeByte(x);

View file

@ -16,8 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "net/http/escape.h" #include "net/http/escape.h"
@ -34,23 +32,12 @@ char *DecodeLatin1(const char *p, size_t n, size_t *z) {
int c; int c;
size_t i; size_t i;
char *r, *q; char *r, *q;
int8_t v1[16], v2[16], vz[16];
if (z) if (z)
*z = 0; *z = 0;
if (n == -1) if (n == -1)
n = p ? strlen(p) : 0; n = p ? strlen(p) : 0;
if ((q = r = malloc(n * 2 + 1))) { if ((q = r = malloc(n * 2 + 1))) {
for (i = 0; i < n;) { for (i = 0; i < n;) {
bzero(vz, 16); /* 3x speedup for ASCII */
while (i + 16 < n) {
memcpy(v1, p + i, 16);
pcmpgtb(v2, v1, vz);
if (pmovmskb((void *)v2) != 0xFFFF)
break;
memcpy(q, v1, 16);
q += 16;
i += 16;
}
c = p[i++] & 0xff; c = p[i++] & 0xff;
if (c < 0200) { if (c < 0200) {
*q++ = c; *q++ = c;

View file

@ -17,8 +17,6 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/stdio/stdio.h" #include "libc/stdio/stdio.h"
#include "libc/str/str.h" #include "libc/str/str.h"

View file

@ -16,8 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/intrin/pcmpgtb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/str/thompike.h" #include "libc/str/thompike.h"
@ -40,23 +38,12 @@ char *Underlong(const char *p, size_t n, size_t *z) {
char *r, *q; char *r, *q;
size_t i, j, m; size_t i, j, m;
wint_t x, a, b; wint_t x, a, b;
int8_t v1[16], v2[16], vz[16];
if (z) if (z)
*z = 0; *z = 0;
if (n == -1) if (n == -1)
n = p ? strlen(p) : 0; n = p ? strlen(p) : 0;
if ((q = r = malloc(n * 2 + 1))) { if ((q = r = malloc(n * 2 + 1))) {
for (i = 0; i < n;) { for (i = 0; i < n;) {
bzero(vz, 16); /* 50x speedup for ASCII */
while (i + 16 < n) {
memcpy(v1, p + i, 16);
pcmpgtb(v2, v1, vz);
if (pmovmskb((void *)v2) != 0xFFFF)
break;
memcpy(q, v1, 16);
q += 16;
i += 16;
}
x = p[i++] & 0xff; x = p[i++] & 0xff;
if (x >= 0300) { if (x >= 0300) {
a = ThomPikeByte(x); a = ThomPikeByte(x);