Have redbean Lua repl show utf-8 when it's valid

Otherwise EncodeLua() will assume strings are binary and display them
using hex sequences.
This commit is contained in:
Justine Tunney 2022-07-23 06:47:01 -07:00
parent 31e4b0867b
commit ce5cb8a2f8
10 changed files with 217 additions and 35 deletions

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/promises.internal.h"
#include "libc/log/libfatal.internal.h"
#include "libc/log/log.h"
#include "libc/nexgen32e/vendor.internal.h"
@ -44,6 +45,7 @@ int IsDebuggerPresent(bool force) {
if (!force && __getenv(environ, "HEISENDEBUG")) return 0;
if (IsWindows()) return IsBeingDebugged();
if (__isworker) return false;
if (!PLEDGED(RPATH)) return false;
res = 0;
if ((fd = __sysv_open("/proc/self/status", O_RDONLY, 0)) >= 0) {
if ((got = __sysv_read(fd, buf, sizeof(buf) - 1)) > 0) {

View file

@ -31,6 +31,7 @@
#include "libc/fmt/fmt.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/promises.internal.h"
#include "libc/log/backtrace.internal.h"
#include "libc/log/color.internal.h"
#include "libc/log/log.h"
@ -65,6 +66,10 @@ static int PrintBacktraceUsingAddr2line(int fd, const struct StackFrame *bp) {
char *debugbin, *p1, *p2, *p3, *addr2line;
char buf[kBacktraceBufSize], *argv[kBacktraceMaxFrames];
if (!PLEDGED(STDIO) || !PLEDGED(EXEC) || !PLEDGED(EXEC)) {
return -1;
}
if (!(debugbin = FindDebugBinary())) {
return -1;
}

View file

@ -20,6 +20,7 @@
#include "libc/bits/bits.h"
#include "libc/bits/weaken.h"
#include "libc/calls/strace.internal.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/macros.internal.h"
#include "libc/runtime/internal.h"
@ -95,7 +96,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) {
static struct SymbolTable *GetSymbolTableFromElf(void) {
int e;
const char *s;
if ((s = FindDebugBinary())) {
if (PLEDGED(RPATH) && (s = FindDebugBinary())) {
return OpenSymbolTable(s);
} else {
return 0;

View file

@ -53,6 +53,7 @@ static textexit void LogStackUse(void) {
bool quote;
char *p, *q;
size_t n, usage;
if (!PLEDGED(STDIO) || !PLEDGED(WPATH) || !PLEDGED(CPATH)) return;
usage = GetStackUsage((char *)GetStackAddr(), GetStackSize());
fd = open(stacklog, O_APPEND | O_CREAT | O_WRONLY, 0644);
p = FormatUint64(stacklog, usage);

View file

@ -16,23 +16,111 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/likely.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h"
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
static const char kUtf8Dispatch[] = {
0, 0, 1, 1, 1, 1, 1, 1, // 0300 utf8-2
1, 1, 1, 1, 1, 1, 1, 1, // 0310
1, 1, 1, 1, 1, 1, 1, 1, // 0320
1, 1, 1, 1, 1, 1, 1, 1, // 0330
2, 3, 3, 3, 3, 3, 3, 3, // 0340 utf8-3
3, 3, 3, 3, 3, 3, 3, 3, // 0350
4, 5, 5, 5, 5, 0, 0, 0, // 0360 utf8-4
0, 0, 0, 0, 0, 0, 0, 0, // 0370
};
/**
* Returns true if text data is most likely utf-8.
* Returns true if text is utf-8.
*
* This function will return false if a pure ascii string is passed.
* _isutf8 n=0 1 nanoseconds
* _isutf8 n=5 661 ps/byte 1,476 mb/s
* _isutf8 ascii n=22851 26 ps/byte 35 GB/s
* _isutf8 unicode n=3193 543 ps/byte 1,795 mb/s
*
* This function considers all ASCII characters including NUL to be
* valid UTF-8. The conditions for something not being valid are:
*
* - Incorrect sequencing of 0300 (FIRST) and 0200 (CONT) chars
* - Thompson-Pike varint sequence not encodable as UTF-16
* - Overlong UTF-8 encoding
*
* @param size if -1 implies strlen
*/
bool _isutf8(const void *data, size_t size) {
const unsigned char *p, *pe;
for (p = data, pe = p + size; p + 2 <= pe; ++p) {
if (p[0] >= 0300) {
if (p[1] >= 0200 && p[1] < 0300) {
noasan bool _isutf8(const void *data, size_t size) {
long c;
unsigned m;
const char *p, *e;
if (size == -1) size = data ? strlen(data) : 0;
if (IsAsan()) __asan_verify(data, size);
p = data;
e = p + size;
while (p < e) {
if (!((intptr_t)p & 15)) {
for (;;) {
if ((m = __builtin_ia32_pmovmskb128(*(xmm_t *)p >= (xmm_t){0}) ^
0xffff)) {
m = __builtin_ctzll(m);
p += m;
break;
} else if ((p += 16) >= e) {
break;
}
}
if (p >= e) {
return true;
} else {
return false;
}
}
if (LIKELY((c = *p++ & 255) < 0200)) continue;
if (UNLIKELY(c < 0300)) return false;
switch (kUtf8Dispatch[c - 0300]) {
case 0:
return false;
case 1:
if (p < e && (*p & 0300) == 0200) {
++p;
break;
} else {
return false; // missing cont
}
case 2:
if (p < e && (*p & 0377) < 0240) {
return false; // overlong
}
// fallthrough
case 3:
if (p + 2 <= e && //
(p[0] & 0300) == 0200 && //
(p[1] & 0300) == 0200) { //
p += 2;
break;
} else {
return false; // missing cont
}
case 4:
if (p < e && (*p & 0377) < 0220) {
return false; // overlong
}
// fallthrough
case 5:
if (p + 3 <= e && //
(((uint32_t)(p[+2] & 0377) << 030 | //
(uint32_t)(p[+1] & 0377) << 020 | //
(uint32_t)(p[+0] & 0377) << 010 | //
(uint32_t)(p[-1] & 0377) << 000) & //
0xC0C0C000) == 0x80808000) { //
p += 3;
break;
} else {
return false; // missing cont
}
default:
unreachable;
}
}
return false;
return true;
}

View file

@ -212,8 +212,13 @@ testonly void testlib_runtestcases(testfn_t *start, testfn_t *end,
*/
const testfn_t *fn;
CopySignalHandlers();
CHECK_NOTNULL(getcwd(g_testlib_olddir, sizeof(g_testlib_olddir)));
if (weaken(testlib_enable_tmp_setup_teardown_once)) SetupTmpDir();
if (weaken(testlib_enable_tmp_setup_teardown) ||
weaken(testlib_enable_tmp_setup_teardown_once)) {
CHECK_NOTNULL(getcwd(g_testlib_olddir, sizeof(g_testlib_olddir)));
}
if (weaken(testlib_enable_tmp_setup_teardown_once)) {
SetupTmpDir();
}
if (weaken(SetUpOnce)) weaken(SetUpOnce)();
for (x = 0, fn = start; fn != end; ++fn) {
if (weaken(testlib_enable_tmp_setup_teardown)) SetupTmpDir();
@ -231,6 +236,10 @@ testonly void testlib_runtestcases(testfn_t *start, testfn_t *end,
CheckForSignalHandlers();
CheckForZombies();
}
if (weaken(TearDownOnce)) weaken(TearDownOnce)();
if (weaken(testlib_enable_tmp_setup_teardown_once)) TearDownTmpDir();
if (weaken(TearDownOnce)) {
weaken(TearDownOnce)();
}
if (weaken(testlib_enable_tmp_setup_teardown_once)) {
TearDownTmpDir();
}
}

View file

@ -19,6 +19,7 @@
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/pthread.h"
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
@ -63,7 +64,7 @@ struct Zipos *__zipos_get(void) {
const char *progpath;
static struct Zipos zipos;
uint8_t *map, *base, *cdir;
if (!once) {
if (!once && PLEDGED(RPATH)) {
__zipos_lock();
progpath = GetProgramExecutableName();
if ((fd = open(progpath, O_RDONLY)) != -1) {