diff --git a/libc/intrin/isdebuggerpresent.c b/libc/intrin/isdebuggerpresent.c index 2216dead4..2e7dd2175 100644 --- a/libc/intrin/isdebuggerpresent.c +++ b/libc/intrin/isdebuggerpresent.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" +#include "libc/intrin/promises.internal.h" #include "libc/log/libfatal.internal.h" #include "libc/log/log.h" #include "libc/nexgen32e/vendor.internal.h" @@ -44,6 +45,7 @@ int IsDebuggerPresent(bool force) { if (!force && __getenv(environ, "HEISENDEBUG")) return 0; if (IsWindows()) return IsBeingDebugged(); if (__isworker) return false; + if (!PLEDGED(RPATH)) return false; res = 0; if ((fd = __sysv_open("/proc/self/status", O_RDONLY, 0)) >= 0) { if ((got = __sysv_read(fd, buf, sizeof(buf) - 1)) > 0) { diff --git a/libc/log/backtrace2.c b/libc/log/backtrace2.c index 1b8b13a90..f0a029b45 100644 --- a/libc/log/backtrace2.c +++ b/libc/log/backtrace2.c @@ -31,6 +31,7 @@ #include "libc/fmt/fmt.h" #include "libc/fmt/itoa.h" #include "libc/intrin/kprintf.h" +#include "libc/intrin/promises.internal.h" #include "libc/log/backtrace.internal.h" #include "libc/log/color.internal.h" #include "libc/log/log.h" @@ -65,6 +66,10 @@ static int PrintBacktraceUsingAddr2line(int fd, const struct StackFrame *bp) { char *debugbin, *p1, *p2, *p3, *addr2line; char buf[kBacktraceBufSize], *argv[kBacktraceMaxFrames]; + if (!PLEDGED(STDIO) || !PLEDGED(EXEC) || !PLEDGED(EXEC)) { + return -1; + } + if (!(debugbin = FindDebugBinary())) { return -1; } diff --git a/libc/runtime/getsymboltable.c b/libc/runtime/getsymboltable.c index e39dbf8a2..bf1543b4d 100644 --- a/libc/runtime/getsymboltable.c +++ b/libc/runtime/getsymboltable.c @@ -20,6 +20,7 @@ #include "libc/bits/bits.h" #include "libc/bits/weaken.h" #include "libc/calls/strace.internal.h" +#include "libc/intrin/promises.internal.h" #include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" #include "libc/runtime/internal.h" @@ -95,7 +96,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) { static struct SymbolTable *GetSymbolTableFromElf(void) { int e; const char *s; - if ((s = FindDebugBinary())) { + if (PLEDGED(RPATH) && (s = FindDebugBinary())) { return OpenSymbolTable(s); } else { return 0; diff --git a/libc/runtime/stackuse.c b/libc/runtime/stackuse.c index 2727bd471..ae2d85084 100644 --- a/libc/runtime/stackuse.c +++ b/libc/runtime/stackuse.c @@ -53,6 +53,7 @@ static textexit void LogStackUse(void) { bool quote; char *p, *q; size_t n, usage; + if (!PLEDGED(STDIO) || !PLEDGED(WPATH) || !PLEDGED(CPATH)) return; usage = GetStackUsage((char *)GetStackAddr(), GetStackSize()); fd = open(stacklog, O_APPEND | O_CREAT | O_WRONLY, 0644); p = FormatUint64(stacklog, usage); diff --git a/libc/str/isutf8.c b/libc/str/isutf8.c index 9155c5e4a..669744339 100644 --- a/libc/str/isutf8.c +++ b/libc/str/isutf8.c @@ -16,23 +16,111 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/likely.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" +typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + +static const char kUtf8Dispatch[] = { + 0, 0, 1, 1, 1, 1, 1, 1, // 0300 utf8-2 + 1, 1, 1, 1, 1, 1, 1, 1, // 0310 + 1, 1, 1, 1, 1, 1, 1, 1, // 0320 + 1, 1, 1, 1, 1, 1, 1, 1, // 0330 + 2, 3, 3, 3, 3, 3, 3, 3, // 0340 utf8-3 + 3, 3, 3, 3, 3, 3, 3, 3, // 0350 + 4, 5, 5, 5, 5, 0, 0, 0, // 0360 utf8-4 + 0, 0, 0, 0, 0, 0, 0, 0, // 0370 +}; + /** - * Returns true if text data is most likely utf-8. + * Returns true if text is utf-8. * - * This function will return false if a pure ascii string is passed. + * _isutf8 n=0 1 nanoseconds + * _isutf8 n=5 661 ps/byte 1,476 mb/s + * _isutf8 ascii n=22851 26 ps/byte 35 GB/s + * _isutf8 unicode n=3193 543 ps/byte 1,795 mb/s + * + * This function considers all ASCII characters including NUL to be + * valid UTF-8. The conditions for something not being valid are: + * + * - Incorrect sequencing of 0300 (FIRST) and 0200 (CONT) chars + * - Thompson-Pike varint sequence not encodable as UTF-16 + * - Overlong UTF-8 encoding + * + * @param size if -1 implies strlen */ -bool _isutf8(const void *data, size_t size) { - const unsigned char *p, *pe; - for (p = data, pe = p + size; p + 2 <= pe; ++p) { - if (p[0] >= 0300) { - if (p[1] >= 0200 && p[1] < 0300) { +noasan bool _isutf8(const void *data, size_t size) { + long c; + unsigned m; + const char *p, *e; + if (size == -1) size = data ? strlen(data) : 0; + if (IsAsan()) __asan_verify(data, size); + p = data; + e = p + size; + while (p < e) { + if (!((intptr_t)p & 15)) { + for (;;) { + if ((m = __builtin_ia32_pmovmskb128(*(xmm_t *)p >= (xmm_t){0}) ^ + 0xffff)) { + m = __builtin_ctzll(m); + p += m; + break; + } else if ((p += 16) >= e) { + break; + } + } + if (p >= e) { return true; - } else { - return false; } } + if (LIKELY((c = *p++ & 255) < 0200)) continue; + if (UNLIKELY(c < 0300)) return false; + switch (kUtf8Dispatch[c - 0300]) { + case 0: + return false; + case 1: + if (p < e && (*p & 0300) == 0200) { + ++p; + break; + } else { + return false; // missing cont + } + case 2: + if (p < e && (*p & 0377) < 0240) { + return false; // overlong + } + // fallthrough + case 3: + if (p + 2 <= e && // + (p[0] & 0300) == 0200 && // + (p[1] & 0300) == 0200) { // + p += 2; + break; + } else { + return false; // missing cont + } + case 4: + if (p < e && (*p & 0377) < 0220) { + return false; // overlong + } + // fallthrough + case 5: + if (p + 3 <= e && // + (((uint32_t)(p[+2] & 0377) << 030 | // + (uint32_t)(p[+1] & 0377) << 020 | // + (uint32_t)(p[+0] & 0377) << 010 | // + (uint32_t)(p[-1] & 0377) << 000) & // + 0xC0C0C000) == 0x80808000) { // + p += 3; + break; + } else { + return false; // missing cont + } + default: + unreachable; + } } - return false; + return true; } diff --git a/libc/testlib/testrunner.c b/libc/testlib/testrunner.c index b004ff2b3..4a7415b69 100644 --- a/libc/testlib/testrunner.c +++ b/libc/testlib/testrunner.c @@ -212,8 +212,13 @@ testonly void testlib_runtestcases(testfn_t *start, testfn_t *end, */ const testfn_t *fn; CopySignalHandlers(); - CHECK_NOTNULL(getcwd(g_testlib_olddir, sizeof(g_testlib_olddir))); - if (weaken(testlib_enable_tmp_setup_teardown_once)) SetupTmpDir(); + if (weaken(testlib_enable_tmp_setup_teardown) || + weaken(testlib_enable_tmp_setup_teardown_once)) { + CHECK_NOTNULL(getcwd(g_testlib_olddir, sizeof(g_testlib_olddir))); + } + if (weaken(testlib_enable_tmp_setup_teardown_once)) { + SetupTmpDir(); + } if (weaken(SetUpOnce)) weaken(SetUpOnce)(); for (x = 0, fn = start; fn != end; ++fn) { if (weaken(testlib_enable_tmp_setup_teardown)) SetupTmpDir(); @@ -231,6 +236,10 @@ testonly void testlib_runtestcases(testfn_t *start, testfn_t *end, CheckForSignalHandlers(); CheckForZombies(); } - if (weaken(TearDownOnce)) weaken(TearDownOnce)(); - if (weaken(testlib_enable_tmp_setup_teardown_once)) TearDownTmpDir(); + if (weaken(TearDownOnce)) { + weaken(TearDownOnce)(); + } + if (weaken(testlib_enable_tmp_setup_teardown_once)) { + TearDownTmpDir(); + } } diff --git a/libc/zipos/get.c b/libc/zipos/get.c index 0543e8089..6511d5526 100644 --- a/libc/zipos/get.c +++ b/libc/zipos/get.c @@ -19,6 +19,7 @@ #include "libc/calls/calls.h" #include "libc/calls/strace.internal.h" #include "libc/intrin/cmpxchg.h" +#include "libc/intrin/promises.internal.h" #include "libc/intrin/pthread.h" #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" @@ -63,7 +64,7 @@ struct Zipos *__zipos_get(void) { const char *progpath; static struct Zipos zipos; uint8_t *map, *base, *cdir; - if (!once) { + if (!once && PLEDGED(RPATH)) { __zipos_lock(); progpath = GetProgramExecutableName(); if ((fd = open(progpath, O_RDONLY)) != -1) { diff --git a/test/libc/str/isutf8_test.c b/test/libc/str/isutf8_test.c new file mode 100644 index 000000000..b52b88969 --- /dev/null +++ b/test/libc/str/isutf8_test.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/symbols.internal.h" +#include "libc/str/str.h" +#include "libc/testlib/blocktronics.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" +#include "libc/testlib/testlib.h" + +__attribute__((__constructor__)) static void init(void) { + GetSymbolTable(); + pledge("stdio", 0); +} + +TEST(isutf8, good) { + ASSERT_TRUE(_isutf8("\0\1\2\3", 4)); + EXPECT_TRUE(_isutf8(kHyperion, kHyperionSize)); + EXPECT_TRUE(_isutf8("𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷▒▒▒▒▒▒▒▒▒▒▒▒", -1)); + EXPECT_TRUE(_isutf8("天地玄黄 宇宙洪荒 日月盈昃 辰宿列张 寒来暑往 秋收冬藏" + "闰馀成岁 律吕调阳 云腾致雨 露结为霜 金生丽水 玉出昆冈" + "剑号巨阙 珠称夜光 果珍李柰 菜重芥姜 海咸河淡 鳞潜羽翔" + "龙师火帝 鸟官人皇 始制文字 乃服衣裳 推位让国 有虞陶唐", + -1)); +} + +TEST(isutf8, bad) { + ASSERT_FALSE(_isutf8("\300\200", -1)); // overlong nul + ASSERT_FALSE(_isutf8("\200\300", -1)); // latin1 c1 control code + ASSERT_FALSE(_isutf8("\300\300", -1)); // missing continuation + ASSERT_FALSE(_isutf8("\377\200\200\200\200", -1)); // thompson-pike varint +} + +TEST(isutf8, oob) { + int n; + char *p; + for (n = 0; n < 32; ++n) { + p = memset(malloc(n), 'a', n); + ASSERT_TRUE(_isutf8(p, n)); + free(p); + } +} + +BENCH(isutf8, bench) { + EZBENCH_N("_isutf8", 0, _isutf8(0, 0)); + EZBENCH_N("_isutf8", 5, _isutf8("hello", 5)); + EZBENCH_N("_isutf8 ascii", kHyperionSize, _isutf8(kHyperion, kHyperionSize)); + EZBENCH_N("_isutf8 unicode", kBlocktronicsSize, + _isutf8(kBlocktronics, kBlocktronicsSize)); +} diff --git a/test/tool/net/encodelua_test.lua b/test/tool/net/encodelua_test.lua index 156dc1a90..a1e48c893 100644 --- a/test/tool/net/encodelua_test.lua +++ b/test/tool/net/encodelua_test.lua @@ -42,8 +42,8 @@ assert(EncodeLua({[{[{[3]=2}]=2}]=2}) == "{[{[{[3]=2}]=2}]=2}") assert(EncodeLua(" [\"new\nline\"] ") == "\" [\\\"new\\nline\\\"] \"") assert(EncodeLua("hello") == [["hello"]]) assert(EncodeLua("\x00") == [["\x00"]]) -assert(EncodeLua("→") == [["\xe2\x86\x92"]]) -assert(EncodeLua("𐌰") == [["\xf0\x90\x8c\xb0"]]) +assert(EncodeLua("→") == [["→"]]) +assert(EncodeLua("𐌰") == [["𐌰"]]) assert(EncodeLua("\a") == [["\a"]]) assert(EncodeLua("\b") == [["\b"]]) assert(EncodeLua("\r") == [["\r"]]) diff --git a/third_party/lua/luaencodeluadata.c b/third_party/lua/luaencodeluadata.c index ec1aa6f68..5b4cc9e08 100644 --- a/third_party/lua/luaencodeluadata.c +++ b/third_party/lua/luaencodeluadata.c @@ -25,6 +25,7 @@ #include "libc/mem/mem.h" #include "libc/runtime/stack.h" #include "libc/stdio/append.internal.h" +#include "libc/str/str.h" #include "libc/x/x.h" #include "third_party/double-conversion/wrapper.h" #include "third_party/lua/cosmo.h" @@ -132,7 +133,7 @@ int main(int argc, char *argv[]) { signed char tab[256] = {0}; for (i = 0; i < 256; ++i) { if (i < 0x20) tab[i] = 1; // hex - if (i >= 0x7f) tab[i] = 1; // hex + if (i >= 0x7f) tab[i] = 2; // hex/utf8 } tab['\e'] = 'e'; tab['\a'] = 'a'; @@ -172,36 +173,42 @@ static const char kLuaStrXlat[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,'\\',0,0,0, // 0x50 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, // 0x70 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x80 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xb0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xc0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xd0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xe0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0xf0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0x80 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0x90 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xa0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xb0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xc0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xd0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xe0 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xf0 }; // clang-format on static int SerializeString(lua_State *L, char **buf, int idx) { - int x; + int c, x; + bool utf8; size_t i, n; const char *s; s = lua_tolstring(L, idx, &n); + utf8 = _isutf8(s, n); RETURN_ON_ERROR(appendw(buf, '"')); for (i = 0; i < n; i++) { - switch ((x = kLuaStrXlat[s[i] & 255])) { + switch ((x = kLuaStrXlat[(c = s[i] & 255)])) { case 0: - RETURN_ON_ERROR(appendw(buf, s[i])); - break; - default: - RETURN_ON_ERROR(appendw(buf, READ32LE("\\\x00\x00") | (x << 8))); + EmitByte: + RETURN_ON_ERROR(appendw(buf, c)); break; + case 2: + if (utf8) goto EmitByte; + // fallthrough case 1: RETURN_ON_ERROR( appendw(buf, '\\' | 'x' << 010 | - "0123456789abcdef"[(s[i] & 0xF0) >> 4] << 020 | - "0123456789abcdef"[(s[i] & 0x0F) >> 0] << 030)); + "0123456789abcdef"[(c & 0xF0) >> 4] << 020 | + "0123456789abcdef"[(c & 0x0F) >> 0] << 030)); + break; + default: + RETURN_ON_ERROR(appendw(buf, READ32LE("\\\x00\x00") | (x << 8))); break; } }