cosmopolitan/test/libc/str/isutf8_test.c
Justine Tunney ebf784d4f5
Make improvements
- Introduce ualarm() function
- Make rename() report EISEMPTY on Windows
- Always raise EINVAL upon open(O_RDONLY|O_TRUNC)
- Add macro so ./configure will detect SOCK_CLOEXEC
- Fix O_TRUNC without O_CREAT not working on Windows
- Let fcntl(F_SETFL) change O_APPEND status on Windows
- Make sure pwrite() / pread() report ESPIPE on sockets
- Raise ESPIPE on Windows when pwrite() is used on pipe
- Properly compute O_APPEND CreateFile() flags on Windows
- Don't require O_DIRECTORY to open directories on Windows
- Fix more instances of Windows reporting EISDIR and ENOTDIR
- Normalize EFTYPE and EMLINK to ELOOP on NetBSD and FreeBSD
- Make unlink() / rmdir() work on read-only files on Windows
- Validate UTF-8 on Windows paths to fix bug with overlong NUL
- Always print signal name to stderr when crashing due to SIG_DFL
- Fix Windows bug where denormalized paths >260 chars didn't work
- Block signals on BSDs when thread exits before trashing its own stack
2023-08-21 02:34:17 -07:00

67 lines
3.5 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2022 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/symbols.internal.h"
#include "libc/str/str.h"
#include "libc/testlib/blocktronics.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
void SetUpOnce(void) {
GetSymbolTable();
}
TEST(isutf8, good) {
ASSERT_TRUE(isutf8("\0\1\2\3", 4));
EXPECT_TRUE(isutf8(kHyperion, kHyperionSize));
EXPECT_TRUE(isutf8("𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷▒▒▒▒▒▒▒▒▒▒▒▒", -1));
EXPECT_TRUE(isutf8("天地玄黄 宇宙洪荒 日月盈昃 辰宿列张 寒来暑往 秋收冬藏"
"闰馀成岁 律吕调阳 云腾致雨 露结为霜 金生丽水 玉出昆冈"
"剑号巨阙 珠称夜光 果珍李柰 菜重芥姜 海咸河淡 鳞潜羽翔"
"龙师火帝 鸟官人皇 始制文字 乃服衣裳 推位让国 有虞陶唐",
-1));
}
TEST(isutf8, bad) {
ASSERT_FALSE(isutf8("\300\200", -1)); // overlong nul
ASSERT_FALSE(isutf8("\200\300", -1)); // latin1 c1 control code
ASSERT_FALSE(isutf8("\300\300", -1)); // missing continuation
ASSERT_FALSE(isutf8("\377\200\200\200\200", -1)); // thompson-pike varint
}
TEST(isutf8, oob) {
int n;
char *p;
for (n = 0; n < 32; ++n) {
p = memset(malloc(n), 'a', n);
ASSERT_TRUE(isutf8(p, n));
free(p);
}
}
BENCH(isutf8, bench) {
EZBENCH_N("isutf8", 0, isutf8(0, 0));
EZBENCH_N("isutf8", 5, isutf8("hello", 5));
EZBENCH_N("isutf8 ascii", kHyperionSize, isutf8(kHyperion, kHyperionSize));
EZBENCH_N("isutf8 unicode", kBlocktronicsSize,
isutf8(kBlocktronics, kBlocktronicsSize));
}