mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-06 19:28:29 +00:00
Do some string library work
This commit is contained in:
parent
83d41e4588
commit
35203c0551
42 changed files with 1381 additions and 136 deletions
|
@ -51,6 +51,6 @@ TEST(utf16to32, testAstralPlanesGothic) {
|
|||
BENCH(utf16to8, bench) {
|
||||
size_t n;
|
||||
char16_t *h;
|
||||
h = gc(utf8toutf16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16toutf8", donothing, free(utf16to32(h, n, 0)));
|
||||
h = gc(utf8to16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16to32", donothing, free(utf16to32(h, n, 0)));
|
||||
}
|
||||
|
|
|
@ -23,21 +23,21 @@
|
|||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf16toutf8, test) {
|
||||
EXPECT_STREQ("hello☻♥", gc(utf16toutf8(u"hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥h", gc(utf16toutf8(u"hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hi", gc(utf16toutf8(u"hello☻♥hello☻♥hi", -1, 0)));
|
||||
TEST(utf16to8, test) {
|
||||
EXPECT_STREQ("hello☻♥", gc(utf16to8(u"hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥h", gc(utf16to8(u"hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hi", gc(utf16to8(u"hello☻♥hello☻♥hi", -1, 0)));
|
||||
EXPECT_STREQ("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥",
|
||||
gc(utf16toutf8(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--h", gc(utf16toutf8(u"hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hi", gc(utf16toutf8(u"hello--hello--hi", -1, 0)));
|
||||
gc(utf16to8(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--h", gc(utf16to8(u"hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hi", gc(utf16to8(u"hello--hello--hi", -1, 0)));
|
||||
EXPECT_STREQ("hello--hello--hello--hello--hello--",
|
||||
gc(utf16toutf8(u"hello--hello--hello--hello--hello--", -1, 0)));
|
||||
gc(utf16to8(u"hello--hello--hello--hello--hello--", -1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf16toutf8, bench) {
|
||||
BENCH(utf16to8, bench) {
|
||||
size_t n;
|
||||
char16_t *h;
|
||||
h = gc(utf8toutf16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16toutf8", donothing, free(utf16toutf8(h, n, 0)));
|
||||
h = gc(utf8to16(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf16to8", donothing, free(utf16to8(h, n, 0)));
|
||||
}
|
|
@ -23,19 +23,18 @@
|
|||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8toutf16, test) {
|
||||
EXPECT_STREQ(u"hello☻♥", gc(utf8toutf16("hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥h", gc(utf8toutf16("hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hi", gc(utf8toutf16("hello☻♥hello☻♥hi", -1, 0)));
|
||||
TEST(utf8to16, test) {
|
||||
EXPECT_STREQ(u"hello☻♥", gc(utf8to16("hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥h", gc(utf8to16("hello☻♥hello☻♥h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hi", gc(utf8to16("hello☻♥hello☻♥hi", -1, 0)));
|
||||
EXPECT_STREQ(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥",
|
||||
gc(utf8toutf16("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--h", gc(utf8toutf16("hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hi", gc(utf8toutf16("hello--hello--hi", -1, 0)));
|
||||
gc(utf8to16("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--h", gc(utf8to16("hello--hello--h", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hi", gc(utf8to16("hello--hello--hi", -1, 0)));
|
||||
EXPECT_STREQ(u"hello--hello--hello--hello--hello--",
|
||||
gc(utf8toutf16("hello--hello--hello--hello--hello--", -1, 0)));
|
||||
gc(utf8to16("hello--hello--hello--hello--hello--", -1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf8toutf16, bench) {
|
||||
EZBENCH2("utf8toutf16", donothing,
|
||||
free(utf8toutf16(kHyperion, kHyperionSize, 0)));
|
||||
BENCH(utf8to16, bench) {
|
||||
EZBENCH2("utf8to16", donothing, free(utf8to16(kHyperion, kHyperionSize, 0)));
|
||||
}
|
107
test/libc/x/utf8to32_test.c
Normal file
107
test/libc/x/utf8to32_test.c
Normal file
|
@ -0,0 +1,107 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/mem/shuffle.internal.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/stdio/rand.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/testlib/viewables.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8to32, test) {
|
||||
EXPECT_STREQ(L"", gc(utf8to32(0, 0, 0)));
|
||||
EXPECT_STREQ(L"", gc(utf8to32("", -1, 0)));
|
||||
EXPECT_STREQ(L"hello", gc(utf8to32("hello", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8to32, poke) {
|
||||
wchar_t *p = gc(utf8to32("hi", 2, 0));
|
||||
ASSERT_EQ('h', p[0]);
|
||||
ASSERT_EQ('i', p[1]);
|
||||
ASSERT_EQ(0, p[2]);
|
||||
}
|
||||
|
||||
TEST(utf32to8, poke) {
|
||||
char *p = gc(utf32to8(L"hi", 2, 0));
|
||||
ASSERT_EQ('h', p[0]);
|
||||
ASSERT_EQ('i', p[1]);
|
||||
ASSERT_EQ(0, p[2]);
|
||||
}
|
||||
|
||||
TEST(utf8to32, testLargeAscii) {
|
||||
EXPECT_STREQ(L"hellohellohelloz", gc(utf8to32("hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohellozhellohellohelloz",
|
||||
gc(utf8to32("hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf32to8, testLargeAscii) {
|
||||
EXPECT_STREQ("hellohellohelloz", gc(utf32to8(L"hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ("hellohellohellozhellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8to32, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ(L"hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf8to32("hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf8to32("hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf8to32(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf32to8, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ("hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ("hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf32to8(L"hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf32to8(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
char *GenerateBranchyUtf8Text(size_t *out_n) {
|
||||
char *p;
|
||||
size_t n;
|
||||
wchar_t *q = gc(utf8to32(kViewables, kViewablesSize, &n));
|
||||
shuffle(lemur64, q, n);
|
||||
p = utf32to8(q, n, &n);
|
||||
if (out_n) *out_n = n;
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* utf8to32 l: 5,806c 1,875ns m: 5,863c 1,894ns
|
||||
* utf32to8 l: 104,671c 33,808ns m: 103,803c 33,528ns
|
||||
* utf8to32 [branchy] l: 746,846c 241,227ns m: 747,312c 241,377ns
|
||||
*/
|
||||
BENCH(utf8to32, bench) {
|
||||
EZBENCH2("utf8to32", donothing, free(utf8to32(kHyperion, kHyperionSize, 0)));
|
||||
size_t n;
|
||||
wchar_t *h = gc(utf8to32(kHyperion, kHyperionSize, &n));
|
||||
EZBENCH2("utf32to8", donothing, free(utf32to8(h, n, 0)));
|
||||
char *p = gc(GenerateBranchyUtf8Text(&n));
|
||||
EZBENCH2("utf8to32 [branchy]", donothing, free(utf8to32(p, n, 0)));
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
TEST(utf8toutf32, test) {
|
||||
EXPECT_STREQ(L"", gc(utf8toutf32(0, 0, 0)));
|
||||
EXPECT_STREQ(L"", gc(utf8toutf32("", -1, 0)));
|
||||
EXPECT_STREQ(L"hello", gc(utf8toutf32("hello", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8toutf32, testLargeAscii) {
|
||||
EXPECT_STREQ(L"hellohellohelloz", gc(utf8toutf32("hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohellozhellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohellozhellohellohelloz", -1, 0)));
|
||||
}
|
||||
|
||||
TEST(utf8toutf32, testLargeThompsonPikeEncoded) {
|
||||
EXPECT_STREQ(L"hellohellohello𝑧hellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohello𝑧hellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(L"hellohellohelloh𝑧ellohellohelloz",
|
||||
gc(utf8toutf32("hellohellohelloh𝑧ellohellohelloz", -1, 0)));
|
||||
EXPECT_STREQ(
|
||||
L"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
gc(utf8toutf32(
|
||||
"𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑒𝑙𝑙𝑜𝑧",
|
||||
-1, 0)));
|
||||
}
|
||||
|
||||
BENCH(utf8toutf32, bench) {
|
||||
EZBENCH2("utf8toutf32", donothing,
|
||||
free(utf8toutf32(kHyperion, kHyperionSize, 0)));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue