mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-06-27 06:48:31 +00:00
Do some string library work
This commit is contained in:
parent
83d41e4588
commit
35203c0551
42 changed files with 1381 additions and 136 deletions
|
@ -16,9 +16,9 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
@ -87,7 +87,7 @@ TEST(sscanf, testStringBuffer_gothicUtf8ToUtf8_roundTrips) {
|
|||
EXPECT_STREQ("𐌴𐌵𐌶𐌷", s2);
|
||||
}
|
||||
|
||||
TEST(sscanf, testStringBuffer_gothicUtf8ToUtf16) {
|
||||
TEST(sscanf, testStringBuffer_gothicUtf8to16) {
|
||||
char16_t s1[64], s2[64];
|
||||
ASSERT_EQ(2, sscanf("𐌰𐌱𐌲𐌳 𐌴𐌵𐌶𐌷", "%63hs %63hs", s1, s2));
|
||||
EXPECT_STREQ(u"𐌰𐌱𐌲𐌳", s1);
|
||||
|
|
|
@ -32,7 +32,7 @@ TEST(strsak32, test) {
|
|||
|
||||
BENCH(strsak32, bench) {
|
||||
size_t wcslen_(const wchar_t *) asm("wcslen");
|
||||
wchar_t *p = gc(utf8toutf32(kHyperion, kHyperionSize, 0));
|
||||
wchar_t *p = gc(utf8to32(kHyperion, kHyperionSize, 0));
|
||||
EZBENCH_N("wcslen", kHyperionSize, wcslen_(p));
|
||||
for (int i = 128; i >= 2; i /= 2) {
|
||||
p[i - 0] = 0;
|
||||
|
|
|
@ -167,6 +167,22 @@ TEST(rawmemchr, fuzz) {
|
|||
free(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* strchr 0 l: 10c 3ns m: 38c 12ns
|
||||
* strchr 5 l: 13c 4ns m: 42c 14ns
|
||||
* strchr 8 l: 14c 5ns m: 44c 14ns
|
||||
* strchr 17 l: 13c 4ns m: 45c 15ns
|
||||
* strchr 34 l: 16c 5ns m: 48c 16ns
|
||||
* strchr z l: 369c 119ns m: 408c 132ns
|
||||
* rawmemchr z l: 236c 76ns m: 391c 126ns
|
||||
* memchr z l: 357c 115ns m: 423c 137ns
|
||||
* strchr Z l: 1,872c 605ns m: 1,911c 617ns
|
||||
* rawmemchr \0 l: 1,184c 382ns m: 1,880c 607ns
|
||||
* strlen l: 1,174c 379ns m: 1,237c 400ns
|
||||
* memchr Z l: 1,866c 603ns m: 1,945c 628ns
|
||||
* strchrnul z l: 365c 118ns m: 408c 132ns
|
||||
* strchrnul Z l: 1,871c 604ns m: 1,914c 618ns
|
||||
*/
|
||||
BENCH(strchr, bench2) {
|
||||
char *strchr_(const char *, int) asm("strchr");
|
||||
char *strchrnul_(const char *, int) asm("strchrnul");
|
||||
|
|
|
@ -51,6 +51,6 @@ TEST(utf16to32, testAstralPlanesGothic) {
|
|||
BENCH(utf16to8, bench) {
|
||||
size_t n;
|
||||
char16_t *h;
|
||||
h = gc(utf8toutf16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16toutf8", donothing, free(utf16to32(h, n, 0)));
|
||||
h = gc(utf8to16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16to32", donothing, free(utf16to32(h, n, 0)));
|
||||
}
|
||||
|
|
|
@ -23,21 +23,21 @@
|
|||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf16toutf8, test) {
|
||||
EXPECT_STREQ("hello☻♥", gc(utf16toutf8(u"hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥h", gc(utf16toutf8(u"hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hi", gc(utf16toutf8(u"hello☻♥hello☻♥hi", -1, 0)));
|
||||
TEST(utf16to8, test) {
|
||||
EXPECT_STREQ("hello☻♥", gc(utf16to8(u"hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥h", gc(utf16to8(u"hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hi", gc(utf16to8(u"hello☻♥hello☻♥hi", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥",
|
||||
gc(utf16toutf8(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--h", gc(utf16toutf8(u"hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hi", gc(utf16toutf8(u"hello--hello--hi", -1, 0)));
|
||||
gc(utf16to8(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--h", gc(utf16to8(u"hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hi", gc(utf16to8(u"hello--hello--hi", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hello--hello--hello--",
|
||||
gc(utf16toutf8(u"hello--hello--hello--hello--hello--", -1, 0)));
|
||||
gc(utf16to8(u"hello--hello--hello--hello--hello--", -1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf16toutf8, bench) {
|
||||
BENCH(utf16to8, bench) {
|
||||
size_t n;
|
||||
char16_t *h;
|
||||
h = gc(utf8toutf16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16toutf8", donothing, free(utf16toutf8(h, n, 0)));
|
||||
h = gc(utf8to16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16to8", donothing, free(utf16to8(h, n, 0)));
|
||||
}
|
|
@ -23,19 +23,18 @@
|
|||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8toutf16, test) {
|
||||
EXPECT_STREQ(u"hello☻♥", gc(utf8toutf16("hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥h", gc(utf8toutf16("hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hi", gc(utf8toutf16("hello☻♥hello☻♥hi", -1, 0)));
|
||||
TEST(utf8to16, test) {
|
||||
EXPECT_STREQ(u"hello☻♥", gc(utf8to16("hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥h", gc(utf8to16("hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hi", gc(utf8to16("hello☻♥hello☻♥hi", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥",
|
||||
gc(utf8toutf16("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--h", gc(utf8toutf16("hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hi", gc(utf8toutf16("hello--hello--hi", -1, 0)));
|
||||
gc(utf8to16("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--h", gc(utf8to16("hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hi", gc(utf8to16("hello--hello--hi", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hello--hello--hello--",
|
||||
gc(utf8toutf16("hello--hello--hello--hello--hello--", -1, 0)));
|
||||
gc(utf8to16("hello--hello--hello--hello--hello--", -1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf8toutf16, bench) {
|
||||
EZBENCH2("utf8toutf16", donothing,
|
||||
free(utf8toutf16(kHyperion, kHyperionSize, 0)));
|
||||
BENCH(utf8to16, bench) {
|
||||
EZBENCH2("utf8to16", donothing, free(utf8to16(kHyperion, kHyperionSize, 0)));
|
||||
}
|
107
test/libc/x/utf8to32_test.c
Normal file
107
test/libc/x/utf8to32_test.c
Normal file
|
@ -0,0 +1,107 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/mem/shuffle.internal.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/stdio/rand.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/testlib/viewables.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8to32, test) {
|
||||
EXPECT_STREQ(L"", gc(utf8to32(0, 0, 0)));
|
||||
EXPECT_STREQ(L"", gc(utf8to32("", -1, 0)));
|
||||
EXPECT_STREQ(L"hello", gc(utf8to32("hello", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8to32, poke) {
|
||||
wchar_t *p = gc(utf8to32("hi", 2, 0));
|
||||
ASSERT_EQ('h', p[0]);
|
||||
ASSERT_EQ('i', p[1]);
|
||||
ASSERT_EQ(0, p[2]);
|
||||
}
|
||||
|
||||
TEST(utf32to8, poke) {
|
||||
char *p = gc(utf32to8(L"hi", 2, 0));
|
||||
ASSERT_EQ('h', p[0]);
|
||||
ASSERT_EQ('i', p[1]);
|
||||
ASSERT_EQ(0, p[2]);
|
||||
}
|
||||
|
||||
TEST(utf8to32, testLargeAscii) {
|
||||
EXPECT_STREQ(L"hellohellohelloz", gc(utf8to32("hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohellozhellohellohelloz",
|
||||
gc(utf8to32("hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf32to8, testLargeAscii) {
|
||||
EXPECT_STREQ("hellohellohelloz", gc(utf32to8(L"hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ("hellohellohellozhellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8to32, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ(L"hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf8to32("hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf8to32("hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf8to32(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf32to8, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ("hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ("hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf32to8(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
char *GenerateBranchyUtf8Text(size_t *out_n) {
|
||||
char *p;
|
||||
size_t n;
|
||||
wchar_t *q = gc(utf8to32(kViewables, kViewablesSize, &n));
|
||||
shuffle(lemur64, q, n);
|
||||
p = utf32to8(q, n, &n);
|
||||
if (out_n) *out_n = n;
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* utf8to32 l: 5,806c 1,875ns m: 5,863c 1,894ns
|
||||
* utf32to8 l: 104,671c 33,808ns m: 103,803c 33,528ns
|
||||
* utf8to32 [branchy] l: 746,846c 241,227ns m: 747,312c 241,377ns
|
||||
*/
|
||||
BENCH(utf8to32, bench) {
|
||||
EZBENCH2("utf8to32", donothing, free(utf8to32(kHyperion, kHyperionSize, 0)));
|
||||
size_t n;
|
||||
wchar_t *h = gc(utf8to32(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf32to8", donothing, free(utf32to8(h, n, 0)));
|
||||
char *p = gc(GenerateBranchyUtf8Text(&n));
|
||||
EZBENCH2("utf8to32 [branchy]", donothing, free(utf8to32(p, n, 0)));
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8toutf32, test) {
|
||||
EXPECT_STREQ(L"", gc(utf8toutf32(0, 0, 0)));
|
||||
EXPECT_STREQ(L"", gc(utf8toutf32("", -1, 0)));
|
||||
EXPECT_STREQ(L"hello", gc(utf8toutf32("hello", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8toutf32, testLargeAscii) {
|
||||
EXPECT_STREQ(L"hellohellohelloz", gc(utf8toutf32("hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohellozhellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8toutf32, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ(L"hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf8toutf32(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf8toutf32, bench) {
|
||||
EZBENCH2("utf8toutf32", donothing,
|
||||
free(utf8toutf32(kHyperion, kHyperionSize, 0)));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue