mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
6f7d0cb1c3
This makes breaking changes to add underscores to many non-standard function names provided by the c library. MODE=tiny is now tinier and we now use smaller locks that are better for tiny apps in this mode. Some headers have been renamed to be in the same folder as the build package, so it'll be easier to know which build dependency is needed. Certain old misguided interfaces have been removed. Intel intrinsics headers are now listed in libc/isystem (but not in the amalgamation) to help further improve open source compatibility. Header complexity has also been reduced. Lastly, more shell scripts are now available.
176 lines
4.8 KiB
C
176 lines
4.8 KiB
C
#include "libc/intrin/bsf.h"
|
|
#include "third_party/chibicc/chibicc.h"
|
|
|
|
// Slurps contents of file.
|
|
char *read_file(const char *path) {
|
|
char *p;
|
|
FILE *fp;
|
|
int buflen, nread, end, n;
|
|
if (!strcmp(path, "-")) {
|
|
fp = stdin;
|
|
} else {
|
|
fp = fopen(path, "r");
|
|
if (!fp) return NULL;
|
|
}
|
|
buflen = 4096;
|
|
nread = 0;
|
|
p = calloc(1, buflen);
|
|
for (;;) {
|
|
end = buflen - 2;
|
|
n = fread(p + nread, 1, end - nread, fp);
|
|
if (n == 0) break;
|
|
nread += n;
|
|
if (nread == end) {
|
|
buflen *= 2;
|
|
p = realloc(p, buflen);
|
|
}
|
|
}
|
|
if (fp != stdin) fclose(fp);
|
|
if (nread > 0 && p[nread - 1] == '\\') {
|
|
p[nread - 1] = '\n';
|
|
} else if (nread == 0 || p[nread - 1] != '\n') {
|
|
p[nread++] = '\n';
|
|
}
|
|
p[nread] = '\0';
|
|
return p;
|
|
}
|
|
|
|
char *skip_bom(char *p) {
|
|
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
|
|
// If exists, just skip them because they are useless bytes.
|
|
// (It is actually not recommended to add BOM markers to UTF-8
|
|
// texts, but it's not uncommon particularly on Windows.)
|
|
if (!memcmp(p, "\357\273\277", 3)) p += 3;
|
|
return p;
|
|
}
|
|
|
|
// Replaces \r or \r\n with \n.
|
|
void canonicalize_newline(char *p) {
|
|
char *q = p;
|
|
for (;;) {
|
|
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'(
|
|
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
|
|
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
|
|
if (!((uintptr_t)p & 15)) {
|
|
xmm_t v;
|
|
unsigned m;
|
|
xmm_t z = {0};
|
|
xmm_t s = {'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
|
|
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'};
|
|
xmm_t t = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
|
|
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
|
|
for (;;) {
|
|
v = *(const xmm_t *)p;
|
|
m = __builtin_ia32_pmovmskb128((v == z) | (v == s) | (v == t));
|
|
if (!m) {
|
|
*(xmm_u *)q = v;
|
|
p += 16;
|
|
q += 16;
|
|
} else {
|
|
m = _bsf(m);
|
|
memmove(q, p, m);
|
|
p += m;
|
|
q += m;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
if (p[0]) {
|
|
if (p[0] == '\r' && p[1] == '\n') {
|
|
p += 2;
|
|
*q++ = '\n';
|
|
} else if (p[0] == '\r') {
|
|
p += 1;
|
|
*q++ = '\n';
|
|
} else {
|
|
*q++ = *p++;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
*q = '\0';
|
|
}
|
|
|
|
// Removes backslashes followed by a newline.
|
|
void remove_backslash_newline(char *p) {
|
|
int i = 0, j = 0;
|
|
// We want to keep the number of newline characters so that
|
|
// the logical line number matches the physical one.
|
|
// This counter maintain the number of newlines we have removed.
|
|
int n = 0;
|
|
bool instring = false;
|
|
for (;;) {
|
|
#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'(
|
|
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
|
|
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
|
|
if (!((uintptr_t)(p + i) & 15)) {
|
|
xmm_t v;
|
|
unsigned m;
|
|
xmm_t A = {0};
|
|
xmm_t B = {'/', '/', '/', '/', '/', '/', '/', '/',
|
|
'/', '/', '/', '/', '/', '/', '/', '/'};
|
|
xmm_t C = {'"', '"', '"', '"', '"', '"', '"', '"',
|
|
'"', '"', '"', '"', '"', '"', '"', '"'};
|
|
xmm_t D = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
|
|
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'};
|
|
xmm_t E = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
|
|
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
|
|
for (;;) {
|
|
v = *(const xmm_t *)(p + i);
|
|
m = __builtin_ia32_pmovmskb128((v == A) | (v == B) | (v == C) |
|
|
(v == D) | (v == E));
|
|
if (!m) {
|
|
*(xmm_u *)(p + j) = v;
|
|
i += 16;
|
|
j += 16;
|
|
} else {
|
|
m = _bsf(m);
|
|
memmove(p + j, p + i, m);
|
|
i += m;
|
|
j += m;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
if (p[i]) {
|
|
if (instring) {
|
|
if (p[i] == '"' && p[i - 1] != '\\') {
|
|
instring = false;
|
|
}
|
|
} else {
|
|
if (p[i] == '"') {
|
|
instring = true;
|
|
} else if (p[i] == '/' && p[i + 1] == '*') {
|
|
p[j++] = p[i++];
|
|
p[j++] = p[i++];
|
|
while (p[i]) {
|
|
if (p[i] == '*' && p[i + 1] == '/') {
|
|
p[j++] = p[i++];
|
|
p[j++] = p[i++];
|
|
break;
|
|
} else {
|
|
p[j++] = p[i++];
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
if (p[i] == '\\' && p[i + 1] == '\n') {
|
|
i += 2;
|
|
n++;
|
|
} else if (p[i] == '\n') {
|
|
p[j++] = p[i++];
|
|
for (; n > 0; n--) p[j++] = '\n';
|
|
} else {
|
|
p[j++] = p[i++];
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
for (; n > 0; n--) p[j++] = '\n';
|
|
p[j] = '\0';
|
|
}
|