Make improvements

- wcsstr() is now linearly complex
- strstr16() is now linearly complex
- strstr() is now vectorized on aarch64 (10x)
- strstr() now uses KMP on pathological cases
- memmem() is now vectorized on aarch64 (10x)
- memmem() now uses KMP on pathological cases
- Disable shared_ptr::owner_before until fixed
- Make iswlower(), iswupper() consistent with glibc
- Remove figure space from iswspace() implementation
- Include line and paragraph separator in iswcntrl()
- Use Musl wcwidth(), iswalpha(), iswpunct(), towlower(), towupper()
This commit is contained in:
Justine Tunney 2024-09-01 01:14:40 -07:00
parent e1528a71e2
commit 7c83f4abc8
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
67 changed files with 5602 additions and 5165 deletions

View file

@ -37,15 +37,16 @@ TEST_LIBC_INTRIN_DIRECTDEPS = \
LIBC_STR \
LIBC_SYSV \
LIBC_SYSV_CALLS \
LIBC_THREAD \
LIBC_TESTLIB \
LIBC_THREAD \
LIBC_TINYMATH \
LIBC_X \
TOOL_VIZ_LIB \
THIRD_PARTY_COMPILER_RT \
THIRD_PARTY_MUSL \
THIRD_PARTY_NSYNC \
THIRD_PARTY_OPENMP \
THIRD_PARTY_XED
THIRD_PARTY_XED \
TOOL_VIZ_LIB \
TEST_LIBC_INTRIN_DEPS := \
$(call uniq,$(foreach x,$(TEST_LIBC_INTRIN_DIRECTDEPS),$($(x))))

View file

@ -17,10 +17,17 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/mem/mem.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/intrin/likely.h"
#include "libc/intrin/safemacros.h"
#include "libc/mem/alg.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdio/rand.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
@ -172,6 +179,26 @@ TEST(memmem, fuzz) {
}
}
TEST(memmem, safety) {
int pagesz = sysconf(_SC_PAGESIZE);
char *map = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
npassert(map != MAP_FAILED);
npassert(!mprotect(map + pagesz, pagesz, PROT_NONE));
for (int haylen = 1; haylen < 128; ++haylen) {
char *hay = map + pagesz - (haylen + 1);
for (int i = 0; i < haylen; ++i)
hay[i] = max(rand() & 255, 1);
hay[haylen] = 0;
for (int neelen = 1; neelen < haylen; ++neelen) {
char *nee = hay + (haylen + 1) - (neelen + 1);
ASSERT_EQ(memmem_naive(hay, haylen, nee, neelen),
memmem(hay, haylen, nee, neelen));
}
}
munmap(map, pagesz * 2);
}
/*
* memmem naive l: 43,783c 14,142ns m: 31,285c 10,105ns
* memmem l: 2,597c 839ns m: 2,612c 844ns
@ -201,7 +228,12 @@ BENCH(memmem, bench) {
EZBENCH2("memmem", donothing,
__expropriate(memmem(kHyperion, kHyperionSize, "THE END", 7)));
EZBENCH2("memmem", donothing,
__expropriate(memmem(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",
62, "aaaaaab", 7)));
__expropriate(
memmem("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",
152,
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaab",
81)));
}

View file

@ -17,11 +17,23 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/safemacros.h"
#include "libc/mem/alg.h"
#include "libc/mem/gc.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdalign.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/stdio/sysparam.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
@ -48,6 +60,13 @@ char *strstr_naive(const char *haystack, const char *needle) {
return 0;
}
TEST(strstr, special) {
MAKESTRING(haystack, "abc123def");
ASSERT_STREQ(&haystack[0], strstr(haystack, haystack));
ASSERT_STREQ(&haystack[0], strstr(haystack, ""));
free(haystack);
}
TEST(strstr, test_emptyString_isFoundAtBeginning) {
MAKESTRING(haystack, "abc123def");
ASSERT_STREQ(&haystack[0], strstr(haystack, gc(strdup(""))));
@ -67,7 +86,8 @@ TEST(strstr, test_notFound1) {
}
TEST(strstr, test_middleOfString) {
MAKESTRING(haystack, "abc123def");
alignas(16) char hog[] = "abc123def";
MAKESTRING(haystack, hog);
ASSERT_STREQ(&haystack[3], strstr(haystack, gc(strdup("123"))));
free(haystack);
}
@ -98,6 +118,25 @@ TEST(strstr, test) {
ASSERT_STREQ("x", strstr("x", "x"));
}
TEST(strstr, safety) {
int pagesz = sysconf(_SC_PAGESIZE);
char *map = (char *)mmap(0, pagesz * 2, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
npassert(map != MAP_FAILED);
npassert(!mprotect(map + pagesz, pagesz, PROT_NONE));
for (int haylen = 1; haylen < 128; ++haylen) {
char *hay = map + pagesz - (haylen + 1);
for (int i = 0; i < haylen; ++i)
hay[i] = max(rand() & 255, 1);
hay[haylen] = 0;
for (int neelen = 1; neelen < haylen; ++neelen) {
char *nee = hay + (haylen + 1) - (neelen + 1);
ASSERT_EQ(strstr_naive(hay, nee), strstr(hay, nee));
}
}
munmap(map, pagesz * 2);
}
TEST(strstr, breakit) {
char *p;
p = gc(calloc(1, 32));

View file

@ -30,7 +30,7 @@ TEST(towupper, test) {
EXPECT_EQ(u'!', towupper(u'!'));
EXPECT_EQ(u'A', towupper(u'a'));
EXPECT_EQ(u'À', towupper(u'à'));
EXPECT_EQ(L'𝛥', towupper(L'𝛿'));
/* EXPECT_EQ(L'𝛥', towupper(L'𝛿')); */
EXPECT_EQ(L'', towupper(L''));
EXPECT_EQ(u'', towupper(u''));
}
@ -39,7 +39,7 @@ TEST(towlower, test) {
EXPECT_EQ(u'!', towlower(u'!'));
EXPECT_EQ(u'a', towlower(u'A'));
EXPECT_EQ(u'à', towlower(u'À'));
EXPECT_EQ(L'𝛿', towlower(L'𝛥'));
/* EXPECT_EQ(L'𝛿', towlower(L'𝛥')); */
EXPECT_EQ(L'', towlower(L''));
EXPECT_EQ(u'', towlower(u''));
}

View file

@ -16,9 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/str/strwidth.h"
#include "libc/str/unicode.h"
#include "libc/testlib/benchmark.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
@ -28,6 +30,7 @@ TEST(wcwidth, test) {
ASSERT_EQ(-1, wcwidth(-7));
ASSERT_EQ(1, wcwidth(0x10FFFD));
ASSERT_EQ(-1, wcwidth(0x10FFFD + 1));
ASSERT_EQ(2, wcwidth(L'😀'));
}
TEST(strwidth, testCjkWidesAndCombiningLowLines_withThompsonPikeEncoding) {
@ -74,6 +77,12 @@ TEST(strwidth, testTextDelimitingControlCodes_dontHaveSubstance) {
EXPECT_EQ(0, strwidth("\1", 0));
}
#define WCWIDTH(x) __expropriate(wcwidth(__veil("r", x)))
BENCH(wcwidth, bench) {
EZBENCH2("wcwidth", donothing, __expropriate(wcwidth(__veil("r", u''))));
BENCHMARK(1000, 1, WCWIDTH(u'a'));
BENCHMARK(1000, 1, WCWIDTH(u'a'));
BENCHMARK(1000, 1, WCWIDTH(u''));
BENCHMARK(1000, 1, WCWIDTH(L'😀'));
BENCHMARK(1000, 1, WCWIDTH(0));
}