Improve some unicode functions

This commit is contained in:
Justine Tunney 2021-05-05 07:25:39 -07:00
parent b9187061a7
commit 1b5a5719c3
33 changed files with 8366 additions and 197 deletions

191
tool/decode/scrubdox.c Normal file
View file

@ -0,0 +1,191 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
THIS PROGRAM TURNS TEXT LIKE THIS
+------------------------------------------------------------------------+
| Button | Name | Go to | From 1.2.3 |
| | | | go to |
|------------+-------------+--------------------------------+------------|
| [ < ] | Back | previous section in reading | 1.2.2 |
| | | order | |
|------------+-------------+--------------------------------+------------|
| [ > ] | Forward | next section in reading order | 1.2.4 |
|------------+-------------+--------------------------------+------------|
| [ << ] | FastBack | previous or up-and-previous | 1.1 |
| | | section | |
|------------+-------------+--------------------------------+------------|
| [ Up ] | Up | up section | 1.2 |
|------------+-------------+--------------------------------+------------|
| [ >> ] | FastForward | next or up-and-next section | 1.3 |
|------------+-------------+--------------------------------+------------|
| [Top] | Top | cover (top) of document | |
|------------+-------------+--------------------------------+------------|
| [Contents] | Contents | table of contents | |
|------------+-------------+--------------------------------+------------|
| [Index] | Index | concept index | |
|------------+-------------+--------------------------------+------------|
| [ ? ] | About | this page | |
+------------------------------------------------------------------------+
INTO THIS
Button Name Go to From 1.2.3
go to
[ < ] Back previous section in reading 1.2.2
order
[ > ] Forward next section in reading order 1.2.4
[ << ] FastBack previous or upandprevious 1.1
section
[ Up ] Up up section 1.2
[ >> ] FastForward next or upandnext section 1.3
[Top] Top cover (top) of document
[Contents] Contents table of contents
[Index] Index concept index
[ ? ] About this page
*/
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/runtime/gc.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/x/x.h"
#define IsSpace(C) ((C) == ' ')
#define IsPipe(C) ((C) == '|' || (C) == u'│')
#define IsPlus(C) ((C) == '+' || (C) == u'┼')
#define IsHyphen(C) ((C) == '-' || (C) == u'─')
#define IsTick(C) ((C) == '`' || (C) == u'└')
int n;
int yn;
int xn;
FILE *f;
bool *V;
char **T;
char16_t **L;
static void DoIt(int y, int x) {
if (V[y * (xn + 1) + x]) return;
V[y * (xn + 1) + x] = 1;
if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsHyphen(L[y][x - 1]) &&
IsHyphen(L[y][x]) && IsHyphen(L[y][x + 1]) &&
IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) &&
IsHyphen(L[y][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPipe(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPipe(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsSpace(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsSpace(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsPipe(L[y - 1][x]) && IsSpace(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsSpace(L[y + 1][x])) {
L[y][x] = u'';
} else if (IsTick(L[y][x]) && IsPipe(L[y - 1][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x]) && IsSpace(L[y][x - 1])) {
L[y][x] = u'';
} else if (L[y][x] == '-') {
L[y][x] = u'';
} else if (L[y][x] == '|') {
L[y][x] = u'';
} else {
return;
}
DoIt(y - 1, x + 0);
DoIt(y + 1, x + 0);
DoIt(y + 0, x - 1);
DoIt(y + 0, x + 1);
}
int main(int argc, char *argv[]) {
char *s;
int y, x;
showcrashreports();
f = stdin;
while ((s = chomp(xgetline(f)))) {
n = strwidth(s, 0);
xn = MAX(xn, n);
T = xrealloc(T, ++yn * sizeof(*T));
T[yn - 1] = s;
}
xn += 1000;
L = xmalloc((yn + 2) * sizeof(*L));
L[0] = utf8toutf16(gc(xasprintf(" %*s ", xn, " ")), -1, 0);
for (y = 0; y < yn; ++y) {
s = xasprintf(" %s%*s ", T[y], xn - n, " ");
L[y + 1] = utf8toutf16(s, -1, 0);
free(T[y]);
free(s);
}
L[yn + 2 - 1] = utf8toutf16(gc(xasprintf(" %*s ", xn, " ")), -1, 0);
free(T);
V = xcalloc((yn + 1) * (xn + 1), 1);
for (y = 1; y <= yn; ++y) {
for (x = 1; x <= xn; ++x) {
if (IsPipe(L[y - 1][x]) && IsHyphen(L[y][x - 1]) && IsPlus(L[y][x]) &&
IsHyphen(L[y][x + 1]) && IsPipe(L[y + 1][x])) {
DoIt(y, x);
}
if (IsTick(L[y][x]) && IsPipe(L[y - 1][x]) && IsHyphen(L[y][x + 1]) &&
IsSpace(L[y + 1][x]) && IsSpace(L[y][x - 1])) {
DoIt(y, x);
}
}
}
for (y = 1; y + 1 < yn; ++y) {
s = utf16toutf8(L[y], -1, 0);
n = strlen(s);
while (n && s[n - 1] == ' ') s[n - 1] = 0, --n;
puts(s + 1);
free(s);
}
for (y = 0; y < yn; ++y) {
free(L[y]);
}
free(L);
free(V);
return 0;
}

View file

@ -1,22 +1,30 @@
(defconst cosmo-c-constants-ansi
'("EOF"
"WEOF"
"NDEBUG"
"HUGE_VAL"
"CLK_TCK"))
(defconst cosmo-c-constants-c11
'("__func__"
"__VA_ARGS__"
"__STDC__"
"__STDC_HOSTED__"
"__STDC_VERSION__"
"__TIME__"
"__STDC_ISO_10646__"
"__STDC_MB_MIGHT_NEQ_WC__"
"__STDC_UTF_16__"
"__STDC_UTF_32__"
"__STDC_ANALYZABLE__"
"__STDC_IEC_559_COMPLEX__"
"__STDC_LIB_EXT1__"
"__STDC_NO_ATOMICS__"
"__STDC_NO_COMPLEX__"
"__STDC_NO_THREADS__"
"__STDC_NO_VLA__"
"__STDC_WANT_LIB_EXT1__"))
'("__func__"
"__VA_ARGS__"
"__STDC__"
"__STDC_HOSTED__"
"__STDC_VERSION__"
"__TIME__"
"__STDC_ISO_10646__"
"__STDC_MB_MIGHT_NEQ_WC__"
"__STDC_UTF_16__"
"__STDC_UTF_32__"
"__STDC_ANALYZABLE__"
"__STDC_IEC_559_COMPLEX__"
"__STDC_LIB_EXT1__"
"__STDC_NO_ATOMICS__"
"__STDC_NO_COMPLEX__"
"__STDC_NO_THREADS__"
"__STDC_NO_VLA__"
"__STDC_WANT_LIB_EXT1__"))
(defconst cosmo-c-constants-limits
'("IMAGE_BASE_VIRTUAL"
@ -24,6 +32,7 @@
"IMAGE_BASE_PHYSICAL"
"CHAR_MAX"
"SCHAR_MAX"
"UCHAR_MAX"
"SHRT_MAX"
"INT_MAX"
"LONG_MAX"
@ -40,6 +49,7 @@
"INTPTR_MAX"
"PTRDIFF_MAX"
"SCHAR_MIN"
"UCHAR_MIN"
"SHRT_MIN"
"UINT_MIN"
"INT_MIN"
@ -152,7 +162,8 @@
(defconst cosmo-c-constants-regex
(concat "\\_<"
(regexp-opt (append cosmo-c-constants-c11
(regexp-opt (append cosmo-c-constants-ansi
cosmo-c-constants-c11
cosmo-c-constants-limits
cosmo-c-constants-math))
"\\_>"))

View file

@ -26,6 +26,7 @@
#include "libc/calls/struct/rusage.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/stat.h"
#include "libc/dos.h"
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/fmt/itoa.h"
@ -83,11 +84,6 @@
#define HASH_LOAD_FACTOR /* 1. / */ 4
#define DEFAULT_PORT 8080
#define DOS_DATE(YEAR, MONTH_IDX1, DAY_IDX1) \
(((YEAR)-1980) << 9 | (MONTH_IDX1) << 5 | (DAY_IDX1))
#define DOS_TIME(HOUR, MINUTE, SECOND) \
((HOUR) << 11 | (MINUTE) << 5 | (SECOND) >> 1)
#define read(F, P, N) readv(F, &(struct iovec){P, N}, 1)
#define LockInc(P) asm volatile("lock incq\t%0" : "=m"(*(P)))
#define AppendCrlf(P) mempcpy(P, "\r\n", 2)
@ -132,11 +128,15 @@ static const struct ContentTypeExtension {
{"atom", "application/atom+xml"}, //
{"avi", "video/x-msvideo"}, //
{"avif", "image/avif"}, //
{"azw", "application/vnd.amazon.ebook"}, //
{"bmp", "image/bmp"}, //
{"bz2", "application/x-bzip2"}, //
{"c", "text/plain"}, //
{"cc", "text/plain"}, //
{"css", "text/css"}, //
{"csv", "text/csv"}, //
{"doc", "application/msword"}, //
{"epub", "application/epub+zip"}, //
{"gif", "image/gif"}, //
{"gz", "application/gzip"}, //
{"h", "text/plain"}, //
@ -147,11 +147,13 @@ static const struct ContentTypeExtension {
{"jar", "application/java-archive"}, //
{"jpeg", "image/jpeg"}, //
{"jpg", "image/jpeg"}, //
{"js", "application/javascript"}, //
{"js", "text/javascript"}, //
{"json", "application/json"}, //
{"m4a", "audio/mpeg"}, //
{"markdown", "text/plain"}, //
{"md", "text/plain"}, //
{"mid", "audio/midi"}, //
{"midi", "audio/midi"}, //
{"mp2", "audio/mpeg"}, //
{"mp3", "audio/mpeg"}, //
{"mp4", "video/mp4"}, //
@ -192,9 +194,11 @@ static const struct ContentTypeExtension {
{"xml", "application/xml"}, //
{"xsl", "application/xslt+xml"}, //
{"xslt", "application/xslt+xml"}, //
{"xz", "application/x-xz"}, //
{"z", "application/zlib"}, //
{"zip", "application/zip"}, //
{"zst", "application/zstd"}, //
{"zst", "application/zstd"}, //
};
static const char kRegCode[][8] = {
@ -638,7 +642,7 @@ static void UseOutput(void) {
}
static void DropOutput(void) {
free(outbuf.p);
FreeLater(outbuf.p);
outbuf.p = 0;
outbuf.n = 0;
outbuf.c = 0;
@ -806,13 +810,16 @@ static char *DescribeServer(void) {
}
static void ProgramBrand(const char *s) {
char *p;
free(brand);
free(serverheader);
brand = strdup(s);
if (!(serverheader = EncodeHttpHeaderValue(brand, -1, 0))) {
fprintf(stderr, "error: brand isn't latin1 encodable: %`'s", brand);
if (!(p = EncodeHttpHeaderValue(s, -1, 0))) {
fprintf(stderr, "error: brand isn't latin1 encodable: %`'s", s);
exit(1);
}
brand = strdup(s);
serverheader = xasprintf("Server: %s\r\n", p);
free(p);
}
static void ProgramLinger(long sec) {
@ -1194,7 +1201,7 @@ static void ReapZombies(void) {
} while (!terminated);
}
static inline ssize_t WritevAll(int fd, struct iovec *iov, int iovlen) {
static ssize_t WritevAll(int fd, struct iovec *iov, int iovlen) {
ssize_t rc;
size_t wrote;
do {
@ -1286,14 +1293,6 @@ forceinline int GetMode(struct Asset *a) {
return a->file ? a->file->st.st_mode : GetZipCfileMode(zmap + a->cf);
}
forceinline bool IsNotModified(struct Asset *a) {
if (msg.version < 10) return false;
if (!HasHeader(kHttpIfModifiedSince)) return false;
return a->lastmodified >=
ParseHttpDateTime(HeaderData(kHttpIfModifiedSince),
HeaderLength(kHttpIfModifiedSince));
}
static char *FormatUnixHttpDateTime(char *s, int64_t t) {
struct tm tm;
gmtime_r(&t, &tm);
@ -1305,7 +1304,7 @@ forceinline bool IsCompressionMethodSupported(int method) {
return method == kZipCompressionNone || method == kZipCompressionDeflate;
}
static unsigned Hash(const void *p, unsigned long n) {
static inline unsigned Hash(const void *p, unsigned long n) {
unsigned h, i;
for (h = i = 0; i < n; i++) {
h += ((unsigned char *)p)[i];
@ -1468,12 +1467,6 @@ static char *AppendCache(char *p, int64_t seconds) {
return AppendExpires(p, (int64_t)shared->nowish + seconds);
}
static inline char *AppendServer(char *p, const char *s) {
p = stpcpy(p, "Server: ");
p = stpcpy(p, s);
return AppendCrlf(p);
}
static inline char *AppendContentLength(char *p, size_t n) {
p = stpcpy(p, "Content-Length: ");
p += uint64toarray_radix10(n, p);
@ -3059,8 +3052,8 @@ static int LuaIsAcceptablePort(lua_State *L) {
return LuaIsValid(L, IsAcceptablePort);
}
static int LuaCoderImpl(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
static noinline int LuaCoderImpl(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
void *p;
size_t n;
p = luaL_checklstring(L, 1, &n);
@ -3070,7 +3063,8 @@ static int LuaCoderImpl(lua_State *L,
return 1;
}
static int LuaCoder(lua_State *L, char *Coder(const char *, size_t, size_t *)) {
static noinline int LuaCoder(lua_State *L,
char *Coder(const char *, size_t, size_t *)) {
return LuaCoderImpl(L, Coder);
}
@ -3220,7 +3214,7 @@ static int LuaCrc32c(lua_State *L) {
return LuaHash(L, crc32c);
}
static int LuaProgramInt(lua_State *L, void Program(long)) {
static noinline int LuaProgramInt(lua_State *L, void Program(long)) {
Program(luaL_checkinteger(L, 1));
return 0;
}
@ -4208,7 +4202,7 @@ static inline int CompareInts(const uint64_t x, uint64_t y) {
return x > y ? 1 : x < y ? -1 : 0;
}
static inline const char *BisectContentType(uint64_t ext) {
static const char *BisectContentType(uint64_t ext) {
int c, m, l, r;
l = 0;
r = ARRAYLEN(kContentTypeExtension) - 1;
@ -4251,6 +4245,14 @@ static const char *GetContentType(struct Asset *a, const char *path, size_t n) {
a->istext ? "text/plain" : "application/octet-stream"));
}
static bool IsNotModified(struct Asset *a) {
if (msg.version < 10) return false;
if (!HasHeader(kHttpIfModifiedSince)) return false;
return a->lastmodified >=
ParseHttpDateTime(HeaderData(kHttpIfModifiedSince),
HeaderLength(kHttpIfModifiedSince));
}
static char *ServeAsset(struct Asset *a, const char *path, size_t pathlen) {
char *p;
uint32_t crc;
@ -4392,7 +4394,7 @@ static bool HandleMessage(void) {
}
if (msg.version >= 10) {
p = AppendCrlf(stpcpy(stpcpy(p, "Date: "), shared->currentdate));
if (!branded) p = AppendServer(p, serverheader);
if (!branded) p = stpcpy(p, serverheader);
if (extrahdrs) p = stpcpy(p, extrahdrs);
if (connectionclose) {
p = stpcpy(p, "Connection: close\r\n");

View file

@ -835,6 +835,26 @@ static void OnMouse(char *p) {
}
}
static void Rando1(void) {
long i, n;
n = (byn * bxn) >> 6;
for (i = 0; i < n; ++i) {
board[i] = rand64();
}
}
static void Rando2(void) {
long i, n;
n = (byn * bxn) >> 6;
for (i = 0; i < n; ++i) {
board[i] = rand();
board[i] <<= 31;
board[i] |= rand();
board[i] <<= 2;
board[i] |= rand() & 0b11;
}
}
static void ReadKeyboard(void) {
char buf[32], *p = buf;
memset(buf, 0, sizeof(buf));
@ -861,6 +881,12 @@ static void ReadKeyboard(void) {
case CTRL('V'):
OnPageDown();
break;
case CTRL('R'):
Rando1();
break;
case CTRL('G'):
Rando2();
break;
case 'M':
if (mousemode) {
DisableMouse();