mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-06 22:43:34 +00:00
Get rid of some legacy code
This commit is contained in:
parent
37ca1badaf
commit
38cc4b3c68
27 changed files with 123 additions and 600 deletions
|
@ -45,6 +45,12 @@ $(DSP_SCALE_A).pkg: \
|
|||
$(DSP_SCALE_A_OBJS) \
|
||||
$(foreach x,$(DSP_SCALE_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
o/$(MODE)/dsp/scale/cdecimate2xuint8x8.o: private \
|
||||
CFLAGS += \
|
||||
-mssse3
|
||||
endif
|
||||
|
||||
o/$(MODE)/dsp/scale/cdecimate2xuint8x8.o \
|
||||
o/$(MODE)/dsp/scale/gyarados.o \
|
||||
o/$(MODE)/dsp/scale/magikarp.o \
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pandn.h"
|
||||
|
||||
/**
|
||||
* Nands 128-bit integers.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pandn)(uint64_t a[2], const uint64_t b[2], const uint64_t c[2]) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
a[i] = ~b[i] & c[i];
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PANDN_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pandn(uint64_t[2], const uint64_t[2], const uint64_t[2]);
|
||||
|
||||
#define pandn(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pandn, SSE2, "pandn", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PANDN_H_ */
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 8-bit integers w/ greater than predicate.
|
||||
*
|
||||
* Note that operands can be xor'd with 0x80 for unsigned compares.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpgtb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
|
||||
unsigned i;
|
||||
int8_t r[16];
|
||||
for (i = 0; i < 16; ++i)
|
||||
r[i] = -(b[i] > c[i]);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpgtb(int8_t[16], const int8_t[16], const int8_t[16]);
|
||||
|
||||
#define pcmpgtb(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpgtb, SSE2, "pcmpgtb", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTB_H_ */
|
|
@ -1,36 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtw.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Compares signed 16-bit integers w/ greater than predicate.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives result
|
||||
* @param 𝑏 [r/o] supplies first input vector
|
||||
* @param 𝑐 [r/o] supplies second input vector
|
||||
* @mayalias
|
||||
*/
|
||||
void(pcmpgtw)(int16_t a[8], const int16_t b[8], const int16_t c[8]) {
|
||||
unsigned i;
|
||||
int16_t r[8];
|
||||
for (i = 0; i < 8; ++i)
|
||||
r[i] = -(b[i] > c[i]);
|
||||
__builtin_memcpy(a, r, 16);
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void pcmpgtw(int16_t[8], const int16_t[8], const int16_t[8]);
|
||||
|
||||
#define pcmpgtw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(pcmpgtw, SSE2, "pcmpgtw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PCMPGTW_H_ */
|
|
@ -1,34 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
|
||||
/**
|
||||
* Turns result of byte comparison into bitmask.
|
||||
*
|
||||
* @param 𝑝 is byte vector to crunch
|
||||
* @see pcmpeqb(), bsf(), etc.
|
||||
*/
|
||||
uint32_t(pmovmskb)(const uint8_t p[16]) {
|
||||
uint32_t i, m;
|
||||
for (m = i = 0; i < 16; ++i) {
|
||||
if (p[i] & 0x80)
|
||||
m |= 1 << i;
|
||||
}
|
||||
return m;
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
uint32_t pmovmskb(const uint8_t[16]);
|
||||
|
||||
#if defined(__x86_64__) && defined(__GNUC__)
|
||||
#define pmovmskb(A) \
|
||||
({ \
|
||||
uint32_t Mask; \
|
||||
if (!IsModeDbg() && X86_HAVE(SSE2)) { \
|
||||
const __intrin_xmm_t *Xmm = (const __intrin_xmm_t *)(A); \
|
||||
if (!X86_NEED(AVX)) { \
|
||||
asm("pmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
|
||||
} else { \
|
||||
asm("vpmovmskb\t%1,%0" : "=r"(Mask) : "x"(*Xmm)); \
|
||||
} \
|
||||
} else { \
|
||||
Mask = pmovmskb(A); \
|
||||
} \
|
||||
Mask; \
|
||||
})
|
||||
#endif
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PMOVMSKB_H_ */
|
|
@ -4,11 +4,8 @@
|
|||
COSMOPOLITAN_C_START_
|
||||
|
||||
void psraw(int16_t[8], const int16_t[8], unsigned char) libcesque;
|
||||
void psrawv(int16_t[8], const int16_t[8], const uint64_t[2]) libcesque;
|
||||
|
||||
#define psraw(A, B, I) INTRIN_SSEVEX_X_I_(psraw, SSE2, "psraw", A, B, I)
|
||||
#define psrawv(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(psrawv, SSE2, "psraw", INTRIN_NONCOMMUTATIVE, A, B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PSRAW_H_ */
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/psraw.h"
|
||||
|
||||
/**
|
||||
* Divides shorts by two power.
|
||||
*
|
||||
* @note arithmetic shift right will sign extend negatives
|
||||
* @mayalias
|
||||
*/
|
||||
void(psrawv)(int16_t a[8], const int16_t b[8], const uint64_t c[2]) {
|
||||
unsigned i;
|
||||
unsigned char k;
|
||||
k = c[0] > 15 ? 15 : c[0];
|
||||
for (i = 0; i < 8; ++i) {
|
||||
a[i] = b[i] >> k;
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/punpckhbw.h"
|
||||
|
||||
/**
|
||||
* Interleaves high bytes.
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
|
||||
* @param 𝑏 [r/o] supplies eight words
|
||||
* @param 𝑐 [r/o] supplies eight words
|
||||
* @mayalias
|
||||
*/
|
||||
void(punpckhbw)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
a[0x0] = b[0x8];
|
||||
a[0x1] = c[0x8];
|
||||
a[0x2] = b[0x9];
|
||||
a[0x3] = c[0x9];
|
||||
a[0x4] = b[0xa];
|
||||
a[0x5] = c[0xa];
|
||||
a[0x6] = b[0xb];
|
||||
a[0x7] = c[0xb];
|
||||
a[0x8] = b[0xc];
|
||||
a[0x9] = c[0xc];
|
||||
a[0xa] = b[0xd];
|
||||
a[0xb] = c[0xd];
|
||||
a[0xc] = b[0xe];
|
||||
a[0xd] = c[0xe];
|
||||
a[0xe] = b[0xf];
|
||||
a[0xf] = c[0xf];
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void punpckhbw(uint8_t[16], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define punpckhbw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(punpckhbw, SSE2, "punpckhbw", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKHBW_H_ */
|
|
@ -1,49 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/punpckhwd.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
/**
|
||||
* Interleaves high words.
|
||||
*
|
||||
* 0 1 2 3 4 5 6 7
|
||||
* B aa bb cc dd EE FF GG HH
|
||||
* C ii jj kk ll MM NN OO PP
|
||||
* └┤ └┤ └┤ └┤
|
||||
* ┌────────┘ │ │ │
|
||||
* │ ┌─────┘ │ │
|
||||
* │ │ ┌──┘ │
|
||||
* ┌───┤ ┌───┤ ┌───┤ ┌───┤
|
||||
* → A EE MM FF NN GG OO HH PP
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
|
||||
* @param 𝑏 [r/o] supplies eight words
|
||||
* @param 𝑐 [r/o] supplies eight words
|
||||
* @mayalias
|
||||
*/
|
||||
void(punpckhwd)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
||||
a[0] = b[4];
|
||||
a[1] = c[4];
|
||||
a[2] = b[5];
|
||||
a[3] = c[5];
|
||||
a[4] = b[6];
|
||||
a[5] = c[6];
|
||||
a[6] = b[7];
|
||||
a[7] = c[7];
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void punpckhwd(uint16_t[8], const uint16_t[8], const uint16_t[8]);
|
||||
|
||||
#define punpckhwd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(punpckhwd, SSE2, "punpckhwd", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKHWD_H_ */
|
|
@ -1,56 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/punpcklbw.h"
|
||||
|
||||
/**
|
||||
* Interleaves low bytes.
|
||||
*
|
||||
* 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
* B A B C D E F G H i j k l m n o p
|
||||
* C Q R S T U V W X y z α σ π μ τ ε
|
||||
* │ │ │ │ │ │ │ │
|
||||
* │ │ │ └─────┐
|
||||
* │ │ └───┐ │ etc...
|
||||
* │ └─┐ │ │
|
||||
* ├─┐ ├─┐ ├─┐ ├─┐
|
||||
* → A A Q B R C S D T E U F V G W H X
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
|
||||
* @param 𝑏 [r/o] supplies eight words
|
||||
* @param 𝑐 [r/o] supplies eight words
|
||||
* @mayalias
|
||||
*/
|
||||
void(punpcklbw)(uint8_t a[16], const uint8_t b[16], const uint8_t c[16]) {
|
||||
a[0xf] = c[7];
|
||||
a[0xe] = b[7];
|
||||
a[0xd] = c[6];
|
||||
a[0xc] = b[6];
|
||||
a[0xb] = c[5];
|
||||
a[0xa] = b[5];
|
||||
a[0x9] = c[4];
|
||||
a[0x8] = b[4];
|
||||
a[0x7] = c[3];
|
||||
a[0x6] = b[3];
|
||||
a[0x5] = c[2];
|
||||
a[0x4] = b[2];
|
||||
a[0x3] = c[1];
|
||||
a[0x2] = b[1];
|
||||
a[0x1] = c[0];
|
||||
a[0x0] = b[0];
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void punpcklbw(uint8_t[16], const uint8_t[16], const uint8_t[16]);
|
||||
|
||||
#define punpcklbw(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(punpcklbw, SSE2, "punpcklbw", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKLBW_H_ */
|
|
@ -1,48 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/punpcklwd.h"
|
||||
|
||||
/**
|
||||
* Interleaves low words.
|
||||
*
|
||||
* 0 1 2 3 4 5 6 7
|
||||
* B AA BB CC DD ee ff gg hh
|
||||
* C II JJ KK LL mm nn oo pp
|
||||
* ├┘ ├┘ ├┘ ├┘
|
||||
* │ │ │ └────────┐
|
||||
* │ │ └─────┐ │
|
||||
* │ └──┐ │ │
|
||||
* ├───┐ ├───┐ ├───┐ ├───┐
|
||||
* → A AA II BB JJ CC KK DD LL
|
||||
*
|
||||
* @param 𝑎 [w/o] receives reduced 𝑏 and 𝑐 interleaved
|
||||
* @param 𝑏 [r/o] supplies eight words
|
||||
* @param 𝑐 [r/o] supplies eight words
|
||||
* @mayalias
|
||||
*/
|
||||
void(punpcklwd)(uint16_t a[8], const uint16_t b[8], const uint16_t c[8]) {
|
||||
a[7] = c[3];
|
||||
a[6] = b[3];
|
||||
a[5] = c[2];
|
||||
a[4] = b[2];
|
||||
a[3] = c[1];
|
||||
a[2] = b[1];
|
||||
a[1] = c[0];
|
||||
a[0] = b[0];
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_
|
||||
#include "libc/intrin/macros.h"
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
void punpcklwd(uint16_t[8], const uint16_t[8], const uint16_t[8]);
|
||||
|
||||
#define punpcklwd(A, B, C) \
|
||||
INTRIN_SSEVEX_X_X_X_(punpcklwd, SSE2, "punpcklwd", INTRIN_NONCOMMUTATIVE, A, \
|
||||
B, C)
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_PUNPCKLWD_H_ */
|
|
@ -17,8 +17,6 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/bsf.h"
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
|
|
|
@ -18,35 +18,55 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/conv.h"
|
||||
#include "libc/intrin/packsswb.h"
|
||||
#include "libc/intrin/pandn.h"
|
||||
#include "libc/intrin/pcmpgtw.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127};
|
||||
#if !IsModeDbg()
|
||||
#if defined(__x86_64__)
|
||||
|
||||
/* 10x speedup for ascii */
|
||||
static axdx_t tprecode16to8_sse2(char *dst, size_t dstsize, const char16_t *src,
|
||||
axdx_t r) {
|
||||
int16_t v1[8], v2[8], v3[8], vz[8];
|
||||
memset(vz, 0, 16);
|
||||
__m128i v1, v2, v3, vz;
|
||||
vz = _mm_setzero_si128();
|
||||
while (r.ax + 8 < dstsize) {
|
||||
memcpy(v1, src + r.dx, 16);
|
||||
pcmpgtw(v2, v1, vz);
|
||||
pcmpgtw(v3, v1, kDel16);
|
||||
pandn((void *)v2, (void *)v3, (void *)v2);
|
||||
if (pmovmskb((void *)v2) != 0xFFFF)
|
||||
v1 = _mm_loadu_si128((__m128i *)(src + r.dx));
|
||||
v2 = _mm_cmpgt_epi16(v1, vz);
|
||||
v3 = _mm_cmpgt_epi16(v1, _mm_set1_epi16(0x7F));
|
||||
v2 = _mm_andnot_si128(v3, v2);
|
||||
if (_mm_movemask_epi8(v2) != 0xFFFF)
|
||||
break;
|
||||
packsswb((void *)v1, v1, v1);
|
||||
memcpy(dst + r.ax, v1, 8);
|
||||
v1 = _mm_packs_epi16(v1, v1);
|
||||
_mm_storel_epi64((__m128i *)(dst + r.ax), v1);
|
||||
r.ax += 8;
|
||||
r.dx += 8;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
static axdx_t tprecode16to8_neon(char *dst, size_t dstsize, const char16_t *src,
|
||||
axdx_t r) {
|
||||
uint16x8_t v1, v2, v3;
|
||||
while (r.ax + 8 < dstsize) {
|
||||
v1 = vld1q_u16((const uint16_t *)(src + r.dx));
|
||||
v2 = vcgtq_u16(v1, vdupq_n_u16(0));
|
||||
v3 = vcgtq_u16(v1, vdupq_n_u16(0x7F));
|
||||
v2 = vbicq_u16(v2, v3);
|
||||
if (vaddvq_u16(v2) != 8 * 0xFFFF)
|
||||
break;
|
||||
vst1_u8((uint8_t *)(dst + r.ax), vqmovn_u16(v1));
|
||||
r.ax += 8;
|
||||
r.dx += 8;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Transcodes UTF-16 to UTF-8.
|
||||
*
|
||||
|
@ -66,10 +86,14 @@ axdx_t tprecode16to8(char *dst, size_t dstsize, const char16_t *src) {
|
|||
r.ax = 0;
|
||||
r.dx = 0;
|
||||
for (;;) {
|
||||
#if defined(__x86_64__) && !IsModeDbg() && !IsTiny()
|
||||
if (!((uintptr_t)(src + r.dx) & 15)) {
|
||||
#if !IsModeDbg()
|
||||
#if defined(__x86_64__)
|
||||
if (!((uintptr_t)(src + r.dx) & 15))
|
||||
r = tprecode16to8_sse2(dst, dstsize, src, r);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
if (!((uintptr_t)(src + r.dx) & 15))
|
||||
r = tprecode16to8_neon(dst, dstsize, src, r);
|
||||
#endif
|
||||
#endif
|
||||
if (!(x = src[r.dx++]))
|
||||
break;
|
||||
|
|
|
@ -16,34 +16,61 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/intrin/punpckhbw.h"
|
||||
#include "libc/intrin/punpcklbw.h"
|
||||
#include <arm_neon.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "libc/dce.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
#if !IsModeDbg()
|
||||
#if defined(__x86_64__)
|
||||
|
||||
// 34x speedup for ascii
|
||||
static inline axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize,
|
||||
const char *src, axdx_t r) {
|
||||
uint8_t v1[16], v2[16], vz[16];
|
||||
memset(vz, 0, 16);
|
||||
__m128i v1, v2, vz;
|
||||
vz = _mm_setzero_si128();
|
||||
while (r.ax + 16 < dstsize) {
|
||||
memcpy(v1, src + r.dx, 16);
|
||||
pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz);
|
||||
if (pmovmskb(v2) != 0xFFFF)
|
||||
v1 = _mm_loadu_si128((__m128i *)(src + r.dx));
|
||||
v2 = _mm_cmpgt_epi8(v1, vz);
|
||||
if (_mm_movemask_epi8(v2) != 0xFFFF)
|
||||
break;
|
||||
punpcklbw(v2, v1, vz);
|
||||
punpckhbw(v1, v1, vz);
|
||||
memcpy(dst + r.ax + 0, v2, 16);
|
||||
memcpy(dst + r.ax + 8, v1, 16);
|
||||
__m128i lo = _mm_unpacklo_epi8(v1, vz);
|
||||
__m128i hi = _mm_unpackhi_epi8(v1, vz);
|
||||
_mm_storeu_si128((__m128i *)(dst + r.ax), lo);
|
||||
_mm_storeu_si128((__m128i *)(dst + r.ax + 8), hi);
|
||||
r.ax += 16;
|
||||
r.dx += 16;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
static inline axdx_t tprecode8to16_neon(char16_t *dst, size_t dstsize,
|
||||
const char *src, axdx_t r) {
|
||||
uint8x16_t v1;
|
||||
while (r.ax + 16 < dstsize) {
|
||||
v1 = vld1q_u8((const uint8_t *)(src + r.dx));
|
||||
uint8x16_t cmp = vcgtq_u8(v1, vdupq_n_u8(0));
|
||||
if (vaddvq_u8(cmp) != 16 * 0xFF)
|
||||
break;
|
||||
uint16x8_t lo = vmovl_u8(vget_low_u8(v1));
|
||||
uint16x8_t hi = vmovl_u8(vget_high_u8(v1));
|
||||
vst1q_u16((uint16_t *)(dst + r.ax), lo);
|
||||
vst1q_u16((uint16_t *)(dst + r.ax + 8), hi);
|
||||
r.ax += 16;
|
||||
r.dx += 16;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Transcodes UTF-8 to UTF-16.
|
||||
*
|
||||
|
@ -64,10 +91,14 @@ axdx_t tprecode8to16(char16_t *dst, size_t dstsize, const char *src) {
|
|||
r.ax = 0;
|
||||
r.dx = 0;
|
||||
for (;;) {
|
||||
#if defined(__x86_64__) && !IsModeDbg()
|
||||
if (!((uintptr_t)(src + r.dx) & 15)) {
|
||||
#if !IsModeDbg()
|
||||
#if defined(__x86_64__)
|
||||
if (!((uintptr_t)(src + r.dx) & 15))
|
||||
r = tprecode8to16_sse2(dst, dstsize, src, r);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
if (!((uintptr_t)(src + r.dx) & 15))
|
||||
r = tprecode8to16_neon(dst, dstsize, src, r);
|
||||
#endif
|
||||
#endif
|
||||
x = src[r.dx++] & 0377;
|
||||
if (x >= 0300) {
|
||||
|
|
|
@ -17,21 +17,13 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/bsr.h"
|
||||
#include "libc/intrin/packsswb.h"
|
||||
#include "libc/intrin/pandn.h"
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pcmpgtw.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/intrin/punpckhbw.h"
|
||||
#include "libc/intrin/punpcklbw.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/serialize.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127};
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
/**
|
||||
* Transcodes UTF-16 to UTF-8.
|
||||
|
@ -45,28 +37,27 @@ char *utf16to8(const char16_t *p, size_t n, size_t *z) {
|
|||
char *r, *q;
|
||||
wint_t x, y;
|
||||
const char16_t *e;
|
||||
int16_t v1[8], v2[8], v3[8], vz[8];
|
||||
if (z)
|
||||
*z = 0;
|
||||
if (n == -1)
|
||||
n = p ? strlen16(p) : 0;
|
||||
if ((q = r = malloc(n * 4 + 8 + 1))) {
|
||||
for (e = p + n; p < e;) {
|
||||
if (p + 8 < e) { /* 17x ascii */
|
||||
bzero(vz, 16);
|
||||
#if defined(__x86_64__)
|
||||
if (p + 8 < e) {
|
||||
do {
|
||||
memcpy(v1, p, 16);
|
||||
pcmpgtw(v2, v1, vz);
|
||||
pcmpgtw(v3, v1, kDel16);
|
||||
pandn((void *)v2, (void *)v3, (void *)v2);
|
||||
if (pmovmskb((void *)v2) != 0xFFFF)
|
||||
__m128i v1 = _mm_loadu_si128((__m128i *)p);
|
||||
__m128i v2 = _mm_cmpgt_epi16(v1, _mm_setzero_si128());
|
||||
__m128i v3 = _mm_cmpgt_epi16(v1, _mm_set1_epi16(127));
|
||||
v2 = _mm_andnot_si128(v3, v2);
|
||||
if (_mm_movemask_epi8(v2) != 0xFFFF)
|
||||
break;
|
||||
packsswb((void *)v1, v1, v1);
|
||||
memcpy(q, v1, 8);
|
||||
_mm_storel_epi64((__m128i *)q, _mm_packs_epi16(v1, v1));
|
||||
p += 8;
|
||||
q += 8;
|
||||
} while (p + 8 < e);
|
||||
}
|
||||
#endif
|
||||
x = *p++ & 0xffff;
|
||||
if (!IsUcs2(x)) {
|
||||
if (p < e) {
|
||||
|
|
|
@ -16,18 +16,12 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/intrin/punpckhbw.h"
|
||||
#include "libc/intrin/punpckhwd.h"
|
||||
#include "libc/intrin/punpcklbw.h"
|
||||
#include "libc/intrin/punpcklwd.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "libc/x/x.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
/**
|
||||
* Transcodes UTF-8 to UTF-32.
|
||||
|
@ -41,35 +35,35 @@ wchar_t *utf8to32(const char *p, size_t n, size_t *z) {
|
|||
unsigned m, j;
|
||||
wint_t x, a, b;
|
||||
wchar_t *r, *q;
|
||||
uint8_t v1[16], v2[16], v3[16], v4[16], vz[16];
|
||||
if (z)
|
||||
*z = 0;
|
||||
if (n == -1)
|
||||
n = p ? strlen(p) : 0;
|
||||
if ((q = r = malloc(n * sizeof(wchar_t) + sizeof(wchar_t)))) {
|
||||
for (i = 0; i < n;) {
|
||||
#ifdef __x86_64__
|
||||
if (!((uintptr_t)(p + i) & 15) && i + 16 < n) {
|
||||
/* 10x speedup for ascii */
|
||||
bzero(vz, 16);
|
||||
do {
|
||||
memcpy(v1, p + i, 16);
|
||||
pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz);
|
||||
if (pmovmskb(v2) != 0xFFFF)
|
||||
__m128i v1, v2, v3, v4;
|
||||
v1 = _mm_loadu_si128((__m128i *)(p + i));
|
||||
v2 = _mm_cmpgt_epi8(v1, _mm_setzero_si128());
|
||||
if (_mm_movemask_epi8(v2) != 0xFFFF)
|
||||
break;
|
||||
punpcklbw(v3, v1, vz);
|
||||
punpckhbw(v1, v1, vz);
|
||||
punpcklwd((void *)v4, (void *)v3, (void *)vz);
|
||||
punpckhwd((void *)v3, (void *)v3, (void *)vz);
|
||||
punpcklwd((void *)v2, (void *)v1, (void *)vz);
|
||||
punpckhwd((void *)v1, (void *)v1, (void *)vz);
|
||||
memcpy(q + 0, v4, 16);
|
||||
memcpy(q + 4, v3, 16);
|
||||
memcpy(q + 8, v2, 16);
|
||||
memcpy(q + 12, v1, 16);
|
||||
v3 = _mm_unpacklo_epi8(v1, _mm_setzero_si128());
|
||||
v1 = _mm_unpackhi_epi8(v1, _mm_setzero_si128());
|
||||
v4 = _mm_unpacklo_epi16(v3, _mm_setzero_si128());
|
||||
v3 = _mm_unpackhi_epi16(v3, _mm_setzero_si128());
|
||||
v2 = _mm_unpacklo_epi16(v1, _mm_setzero_si128());
|
||||
v1 = _mm_unpackhi_epi16(v1, _mm_setzero_si128());
|
||||
_mm_storeu_si128((__m128i *)(q + 0), v4);
|
||||
_mm_storeu_si128((__m128i *)(q + 4), v3);
|
||||
_mm_storeu_si128((__m128i *)(q + 8), v2);
|
||||
_mm_storeu_si128((__m128i *)(q + 12), v1);
|
||||
i += 16;
|
||||
q += 16;
|
||||
} while (i + 16 < n);
|
||||
}
|
||||
#endif
|
||||
x = p[i++] & 0xff;
|
||||
if (x >= 0300) {
|
||||
a = ThomPikeByte(x);
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "net/http/escape.h"
|
||||
|
@ -34,23 +32,12 @@ char *DecodeLatin1(const char *p, size_t n, size_t *z) {
|
|||
int c;
|
||||
size_t i;
|
||||
char *r, *q;
|
||||
int8_t v1[16], v2[16], vz[16];
|
||||
if (z)
|
||||
*z = 0;
|
||||
if (n == -1)
|
||||
n = p ? strlen(p) : 0;
|
||||
if ((q = r = malloc(n * 2 + 1))) {
|
||||
for (i = 0; i < n;) {
|
||||
bzero(vz, 16); /* 3x speedup for ASCII */
|
||||
while (i + 16 < n) {
|
||||
memcpy(v1, p + i, 16);
|
||||
pcmpgtb(v2, v1, vz);
|
||||
if (pmovmskb((void *)v2) != 0xFFFF)
|
||||
break;
|
||||
memcpy(q, v1, 16);
|
||||
q += 16;
|
||||
i += 16;
|
||||
}
|
||||
c = p[i++] & 0xff;
|
||||
if (c < 0200) {
|
||||
*q++ = c;
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/pcmpgtb.h"
|
||||
#include "libc/intrin/pmovmskb.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/thompike.h"
|
||||
|
@ -40,23 +38,12 @@ char *Underlong(const char *p, size_t n, size_t *z) {
|
|||
char *r, *q;
|
||||
size_t i, j, m;
|
||||
wint_t x, a, b;
|
||||
int8_t v1[16], v2[16], vz[16];
|
||||
if (z)
|
||||
*z = 0;
|
||||
if (n == -1)
|
||||
n = p ? strlen(p) : 0;
|
||||
if ((q = r = malloc(n * 2 + 1))) {
|
||||
for (i = 0; i < n;) {
|
||||
bzero(vz, 16); /* 50x speedup for ASCII */
|
||||
while (i + 16 < n) {
|
||||
memcpy(v1, p + i, 16);
|
||||
pcmpgtb(v2, v1, vz);
|
||||
if (pmovmskb((void *)v2) != 0xFFFF)
|
||||
break;
|
||||
memcpy(q, v1, 16);
|
||||
q += 16;
|
||||
i += 16;
|
||||
}
|
||||
x = p[i++] & 0xff;
|
||||
if (x >= 0300) {
|
||||
a = ThomPikeByte(x);
|
||||
|
|
Loading…
Reference in a new issue