mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make memchr() and memccpy() faster
This commit is contained in:
parent
fef24d622a
commit
e4d6eb382a
3 changed files with 95 additions and 24 deletions
|
@ -19,10 +19,10 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "third_party/aarch64/arm_neon.internal.h"
|
||||
#include "third_party/intel/immintrin.internal.h"
|
||||
#ifndef __aarch64__
|
||||
|
||||
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
static inline const unsigned char *memchr_pure(const unsigned char *s,
|
||||
unsigned char c, size_t n) {
|
||||
size_t i;
|
||||
|
@ -35,22 +35,27 @@ static inline const unsigned char *memchr_pure(const unsigned char *s,
|
|||
}
|
||||
|
||||
#if defined(__x86_64__) && !defined(__chibicc__)
|
||||
static __vex const unsigned char *memchr_sse(const unsigned char *s,
|
||||
unsigned char c, size_t n) {
|
||||
size_t i;
|
||||
unsigned m;
|
||||
xmm_t v, t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
|
||||
for (; n >= 16; n -= 16, s += 16) {
|
||||
v = *(const xmm_t *)s;
|
||||
m = __builtin_ia32_pmovmskb128(v == t);
|
||||
if (m) {
|
||||
m = __builtin_ctzll(m);
|
||||
return s + m;
|
||||
}
|
||||
static const char *memchr_sse(const char *s, char c, size_t n) {
|
||||
const char *e = s + n;
|
||||
__m128i t = _mm_set1_epi8(c);
|
||||
unsigned m, k = (uintptr_t)s & 15;
|
||||
m = _mm_movemask_epi8(
|
||||
_mm_cmpeq_epi8(_mm_load_si128((const __m128i *)((uintptr_t)s & -16)), t));
|
||||
m >>= k;
|
||||
if (m) {
|
||||
s += __builtin_ctz(m);
|
||||
if (s < e)
|
||||
return s;
|
||||
return 0;
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (s[i] == c) {
|
||||
return s + i;
|
||||
for (s += 16 - k; s < e; s += 16) {
|
||||
m = _mm_movemask_epi8(
|
||||
_mm_cmpeq_epi8(_mm_load_si128((const __m128i *)s), t));
|
||||
if (m) {
|
||||
s += __builtin_ctz(m);
|
||||
if (s < e)
|
||||
return s;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -45,13 +45,14 @@
|
|||
* @asyncsignalsafe
|
||||
*/
|
||||
void *memccpy(void *dst, const void *src, int c, size_t n) {
|
||||
char *d;
|
||||
size_t i;
|
||||
const char *s;
|
||||
for (d = dst, s = src, i = 0; i < n; ++i) {
|
||||
if (((d[i] = s[i]) & 255) == (c & 255)) {
|
||||
return d + i + 1;
|
||||
}
|
||||
const char *p;
|
||||
// this memchr() call is only correct if your memchr() implementation
|
||||
// offers the same readahead safety guarantees as cosmopolitan's does
|
||||
if ((p = memchr(src, c, n))) {
|
||||
size_t m = p + 1 - (const char *)src;
|
||||
memmove(dst, src, m);
|
||||
return (char *)dst + m;
|
||||
}
|
||||
memmove(dst, src, n);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -16,10 +16,18 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/intrin/safemacros.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/sysconf.h"
|
||||
#include "libc/stdio/rand.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/testlib/benchmark.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
||||
|
@ -50,6 +58,40 @@ TEST(memccpy, testZeroLength_doesNothing) {
|
|||
EXPECT_EQ(NULL, memccpy(buf, "hi", '\0', 0));
|
||||
}
|
||||
|
||||
TEST(memccpy, fuzz) {
|
||||
int pagesz = sysconf(_SC_PAGESIZE);
|
||||
char *map1 = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
npassert(map1 != MAP_FAILED);
|
||||
npassert(!mprotect(map1 + pagesz, pagesz, PROT_NONE));
|
||||
char *map2 = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
npassert(map2 != MAP_FAILED);
|
||||
npassert(!mprotect(map2 + pagesz, pagesz, PROT_NONE));
|
||||
char *map3 = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
npassert(map3 != MAP_FAILED);
|
||||
npassert(!mprotect(map3 + pagesz, pagesz, PROT_NONE));
|
||||
for (int dsize = 1; dsize < 128; ++dsize) {
|
||||
char *volatile dst1 = map1 + pagesz - dsize;
|
||||
char *volatile dst2 = map1 + pagesz - dsize;
|
||||
for (int i = 0; i < dsize; ++i)
|
||||
dst1[i] = dst2[i] = rand();
|
||||
for (int ssize = 1; ssize < dsize * 2; ++ssize) {
|
||||
char *volatile src = map3 + pagesz - (ssize + 1);
|
||||
for (int i = 0; i < ssize; ++i)
|
||||
src[i] = max(rand() & 255, 1);
|
||||
src[ssize] = 0;
|
||||
ASSERT_EQ(memccpy_pure(dst1, src, 0, dsize),
|
||||
memccpy(dst2, src, 0, dsize));
|
||||
ASSERT_EQ(0, memcmp(dst1, dst2, dsize));
|
||||
}
|
||||
}
|
||||
npassert(!munmap(map3, pagesz * 2));
|
||||
npassert(!munmap(map2, pagesz * 2));
|
||||
npassert(!munmap(map1, pagesz * 2));
|
||||
}
|
||||
|
||||
TEST(memccpy, memcpy) {
|
||||
unsigned n, n1, n2;
|
||||
char *b1, *b2, *b3, *e1, *e2;
|
||||
|
@ -78,3 +120,26 @@ TEST(memccpy, memcpy) {
|
|||
free(b1);
|
||||
}
|
||||
}
|
||||
|
||||
#define N 4096
|
||||
|
||||
BENCH(memccpy, bench) {
|
||||
char dst[N];
|
||||
char src[N + 1];
|
||||
|
||||
printf("\n");
|
||||
for (int n = 1; n <= N; n *= 2) {
|
||||
for (int i = 0; i < n; ++i)
|
||||
src[i] = max(rand() & 255, 1);
|
||||
src[n] = 0;
|
||||
BENCHMARK(100, n, X(memccpy(dst, src, 0, V(N))));
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
for (int n = 1; n <= N; n *= 2) {
|
||||
for (int i = 0; i < n; ++i)
|
||||
src[i] = max(rand() & 255, 1);
|
||||
src[n] = 0;
|
||||
BENCHMARK(100, n, X(memccpy_pure(dst, src, 0, V(N))));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue