mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-07 03:38:31 +00:00
Add x86_64-linux-gnu emulator
I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
This commit is contained in:
parent
467504308a
commit
f4f4caab0e
1052 changed files with 65667 additions and 7825 deletions
|
@ -1,7 +1,7 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_FMT_BING_H_
|
||||
#define COSMOPOLITAN_LIBC_FMT_BING_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
#ifndef __cplusplus
|
||||
|
||||
int bing(int, int) nosideeffect;
|
||||
int unbing(int) nosideeffect;
|
||||
|
@ -11,6 +11,6 @@ void *unhexbuf(void *, size_t, const char *);
|
|||
void *unhexstr(const char *) mallocesque;
|
||||
short *bingblit(int ys, int xs, unsigned char[ys][xs], int, int);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* __cplusplus */
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_FMT_BING_H_ */
|
||||
|
|
|
@ -39,7 +39,8 @@ LIBC_FMT_A_DIRECTDEPS = \
|
|||
LIBC_TINYMATH \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_NT_KERNELBASE \
|
||||
LIBC_SYSV
|
||||
LIBC_SYSV \
|
||||
THIRD_PARTY_COMPILER_RT
|
||||
|
||||
LIBC_FMT_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(LIBC_FMT_A_DIRECTDEPS),$($(x))))
|
||||
|
|
|
@ -35,20 +35,6 @@ info@paland.com\"");
|
|||
#include "libc/str/internal.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
|
||||
/* 'ntoa' conversion buffer size, this must be big enough to hold one
|
||||
converted numeric number including padded zeros (dynamically created
|
||||
on stack) default: 64 byte */
|
||||
#ifndef PRINTF_NTOA_BUFFER_SIZE
|
||||
#define PRINTF_NTOA_BUFFER_SIZE 64
|
||||
#endif
|
||||
|
||||
/* 'ftoa' conversion buffer size, this must be big enough to hold one
|
||||
converted float number including padded zeros (dynamically created on
|
||||
stack) default: 32 byte */
|
||||
#ifndef PRINTF_FTOA_BUFFER_SIZE
|
||||
#define PRINTF_FTOA_BUFFER_SIZE 64
|
||||
#endif
|
||||
|
||||
#define FLAGS_ZEROPAD (1U << 0U)
|
||||
#define FLAGS_LEFT (1U << 1U)
|
||||
#define FLAGS_PLUS (1U << 2U)
|
||||
|
|
|
@ -59,7 +59,7 @@ int ftoa(int out(int, void *), void *arg, long double value, unsigned long prec,
|
|||
buf[2] = 'n';
|
||||
buf[3] = '\0';
|
||||
len += 3;
|
||||
} else if (isinf(value) || fabsl(value) > 0x7ffffffffffffffful) {
|
||||
} else if (isinf(value) || (value && ilogbl(fabsl(value)) > 63)) {
|
||||
buf[0] = 'f';
|
||||
buf[1] = 'n';
|
||||
buf[2] = 'i';
|
||||
|
@ -90,11 +90,11 @@ int ftoa(int out(int, void *), void *arg, long double value, unsigned long prec,
|
|||
++whole;
|
||||
}
|
||||
} else if (diff < 0.5) {
|
||||
} else if ((frac == 0U) || (frac & 1U)) {
|
||||
} else if (!frac || (frac & 1)) {
|
||||
++frac; /* if halfway, round up if odd OR if last digit is 0 */
|
||||
}
|
||||
|
||||
if (prec == 0U) {
|
||||
if (!prec) {
|
||||
diff = fabsl(value) - whole;
|
||||
if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) {
|
||||
/* exactly 0.5 and ODD, then round up */
|
||||
|
@ -112,7 +112,7 @@ int ftoa(int out(int, void *), void *arg, long double value, unsigned long prec,
|
|||
}
|
||||
}
|
||||
/* add extra 0s */
|
||||
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) {
|
||||
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0)) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
if (len < PRINTF_FTOA_BUFFER_SIZE) {
|
||||
|
|
|
@ -29,9 +29,11 @@
|
|||
#include "libc/fmt/paland.inc"
|
||||
#include "libc/fmt/palandprintf.h"
|
||||
|
||||
uintmax_t __udivmodti4(uintmax_t, uintmax_t, uintmax_t *);
|
||||
|
||||
static int ntoaformat(int out(int, void *), void *arg, char *buf, unsigned len,
|
||||
bool negative, unsigned log2base, unsigned prec,
|
||||
unsigned width, unsigned flags) {
|
||||
unsigned width, unsigned char flags) {
|
||||
unsigned i, idx;
|
||||
idx = 0;
|
||||
|
||||
|
@ -101,9 +103,10 @@ static int ntoaformat(int out(int, void *), void *arg, char *buf, unsigned len,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int ntoa2(int out(int, void *), void *arg, uintmax_t value, bool neg,
|
||||
unsigned log2base, unsigned prec, unsigned width,
|
||||
unsigned flags, const char *alphabet) {
|
||||
int ntoa2(int out(int, void *), void *arg, uintmax_t value, bool neg,
|
||||
unsigned log2base, unsigned prec, unsigned width, unsigned flags,
|
||||
const char *alphabet) {
|
||||
uintmax_t remainder;
|
||||
unsigned len, count, digit;
|
||||
char buf[PRINTF_NTOA_BUFFER_SIZE];
|
||||
len = 0;
|
||||
|
@ -112,12 +115,13 @@ static int ntoa2(int out(int, void *), void *arg, uintmax_t value, bool neg,
|
|||
count = 0;
|
||||
do {
|
||||
assert(len < PRINTF_NTOA_BUFFER_SIZE);
|
||||
if (log2base) {
|
||||
if (!log2base) {
|
||||
value = __udivmodti4(value, 10, &remainder);
|
||||
digit = remainder;
|
||||
} else {
|
||||
digit = value;
|
||||
digit &= (1u << log2base) - 1;
|
||||
value >>= log2base;
|
||||
} else {
|
||||
value = div10(value, &digit);
|
||||
}
|
||||
if ((flags & FLAGS_GROUPING) && count == 3) {
|
||||
buf[len++] = ',';
|
||||
|
@ -132,12 +136,12 @@ static int ntoa2(int out(int, void *), void *arg, uintmax_t value, bool neg,
|
|||
}
|
||||
|
||||
int ntoa(int out(int, void *), void *arg, va_list va, unsigned char signbit,
|
||||
unsigned long log2base, unsigned long precision, unsigned long width,
|
||||
unsigned long flags, const char *alphabet) {
|
||||
bool negative;
|
||||
unsigned long log2base, unsigned long prec, unsigned long width,
|
||||
unsigned char flags, const char *lang) {
|
||||
bool neg;
|
||||
uintmax_t value, sign;
|
||||
|
||||
/* ignore '0' flag when precision is given */
|
||||
/* ignore '0' flag when prec is given */
|
||||
if (flags & FLAGS_PRECISION) {
|
||||
flags &= ~FLAGS_ZEROPAD;
|
||||
}
|
||||
|
@ -153,24 +157,22 @@ int ntoa(int out(int, void *), void *arg, va_list va, unsigned char signbit,
|
|||
value = va_arg(va, uint64_t);
|
||||
}
|
||||
|
||||
negative = false;
|
||||
sign = (uintmax_t)1 << signbit;
|
||||
if (value > (sign | (sign - 1))) erange();
|
||||
neg = 0;
|
||||
sign = 1;
|
||||
sign <<= signbit;
|
||||
value &= sign | (sign - 1);
|
||||
if (flags & FLAGS_ISSIGNED) {
|
||||
if (value != sign) {
|
||||
if (value & sign) {
|
||||
value = ~value + 1;
|
||||
negative = true;
|
||||
value &= sign | (sign - 1);
|
||||
neg = 1;
|
||||
}
|
||||
value &= sign - 1;
|
||||
} else {
|
||||
neg = 1;
|
||||
}
|
||||
} else {
|
||||
value &= sign | (sign - 1);
|
||||
}
|
||||
|
||||
if (ntoa2(out, arg, value, negative, log2base, precision, width, flags,
|
||||
alphabet) == -1) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
return ntoa2(out, arg, value, neg, log2base, prec, width, flags, lang);
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
└─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/weaken.h"
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/escape/escape.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
|
@ -47,10 +48,12 @@
|
|||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
|
||||
static unsigned ppatoi(const char **str) {
|
||||
unsigned i;
|
||||
i = 0;
|
||||
while (isdigit(**str)) i = i * 10u + (unsigned)(*((*str)++) - '0');
|
||||
static int ppatoi(const char **str) {
|
||||
int i;
|
||||
for (i = 0; '0' <= **str && **str <= '9'; ++*str) {
|
||||
i *= 10;
|
||||
i += **str - '0';
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -104,9 +107,14 @@ static unsigned ppatoi(const char **str) {
|
|||
* @see printf() for wordier documentation
|
||||
*/
|
||||
hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
||||
void *p;
|
||||
char qchar;
|
||||
long double ldbl;
|
||||
wchar_t charbuf[3];
|
||||
const char *alphabet;
|
||||
int (*out)(int, void *);
|
||||
unsigned flags, width, precision;
|
||||
int lasterr;
|
||||
unsigned char signbit, log2base;
|
||||
int w, rc, flags, width, lasterr, precision;
|
||||
|
||||
lasterr = errno;
|
||||
out = fn ? fn : (int (*)(int, void *))missingno;
|
||||
|
@ -161,7 +169,7 @@ hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
|||
if (isdigit(*format)) {
|
||||
width = ppatoi(&format);
|
||||
} else if (*format == '*') {
|
||||
const int w = va_arg(va, int);
|
||||
w = va_arg(va, int);
|
||||
if (w < 0) {
|
||||
flags |= FLAGS_LEFT; /* reverse padding */
|
||||
width = -w;
|
||||
|
@ -179,14 +187,16 @@ hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
|||
if (isdigit(*format)) {
|
||||
precision = ppatoi(&format);
|
||||
} else if (*format == '*') {
|
||||
const int prec = (int)va_arg(va, int);
|
||||
precision = prec > 0 ? prec : 0;
|
||||
precision = va_arg(va, int);
|
||||
format++;
|
||||
}
|
||||
}
|
||||
if (precision < 0) {
|
||||
precision = 0;
|
||||
}
|
||||
|
||||
/* evaluate length field */
|
||||
unsigned char signbit = 31;
|
||||
signbit = 31;
|
||||
switch (*format) {
|
||||
case 'j': /* intmax_t */
|
||||
format++;
|
||||
|
@ -196,8 +206,8 @@ hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
|||
if (format[1] == 'l') format++;
|
||||
/* fallthrough */
|
||||
case 't': /* ptrdiff_t */
|
||||
case 'Z': /* size_t */
|
||||
case 'z': /* size_t */
|
||||
case 'Z': /* size_t */
|
||||
case 'L': /* long double */
|
||||
format++;
|
||||
signbit = 63;
|
||||
|
@ -216,12 +226,9 @@ hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
|||
}
|
||||
|
||||
/* evaluate specifier */
|
||||
void *p;
|
||||
const char *alphabet = "0123456789abcdef";
|
||||
unsigned log2base = 0;
|
||||
wchar_t charbuf[3];
|
||||
int rc;
|
||||
char qchar = '"';
|
||||
alphabet = "0123456789abcdef";
|
||||
log2base = 0;
|
||||
qchar = '"';
|
||||
switch (*format++) {
|
||||
case 'p':
|
||||
flags |= FLAGS_ZEROPAD;
|
||||
|
@ -256,23 +263,21 @@ hidden int palandprintf(void *fn, void *arg, const char *format, va_list va) {
|
|||
}
|
||||
|
||||
case 'f':
|
||||
case 'F': {
|
||||
long double value;
|
||||
case 'F':
|
||||
if (signbit == 63) {
|
||||
value = va_arg(va, long double);
|
||||
ldbl = va_arg(va, long double);
|
||||
} else {
|
||||
value = va_arg(va, double);
|
||||
ldbl = va_arg(va, double);
|
||||
}
|
||||
if (weaken(ftoa)(out, arg, value, precision, width, flags) == -1) {
|
||||
if (weaken(ftoa)(out, arg, ldbl, precision, width, flags) == -1) {
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'c':
|
||||
qchar = '\'';
|
||||
p = charbuf;
|
||||
charbuf[0] = (wchar_t)va_arg(va, int); /* @assume little endian */
|
||||
charbuf[0] = va_arg(va, int);
|
||||
charbuf[1] = L'\0';
|
||||
goto showstr;
|
||||
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_FMT_PALANDPRINTF_H_
|
||||
#define COSMOPOLITAN_LIBC_FMT_PALANDPRINTF_H_
|
||||
|
||||
#define PRINTF_NTOA_BUFFER_SIZE 144
|
||||
#define PRINTF_FTOA_BUFFER_SIZE 64
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
|
@ -9,7 +13,7 @@ int ftoa(int(int, void *), void *, long double, unsigned long, unsigned long,
|
|||
int stoa(int(int, void *), void *, void *, unsigned long, unsigned long,
|
||||
unsigned long, unsigned char, unsigned char) hidden;
|
||||
int ntoa(int(int, void *), void *, va_list, unsigned char, unsigned long,
|
||||
unsigned long, unsigned long, unsigned long, const char *) hidden;
|
||||
unsigned long, unsigned long, unsigned char, const char *) hidden;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/vsscanf.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
|
||||
/**
|
||||
* String decoder.
|
||||
|
@ -28,11 +28,7 @@ int(sscanf)(const char *str, const char *fmt, ...) {
|
|||
int rc;
|
||||
va_list va;
|
||||
va_start(va, fmt);
|
||||
if (IsTiny()) {
|
||||
rc = (vsscanf)(str, fmt, va);
|
||||
} else {
|
||||
rc = __vsscanf(str, fmt, va, __vcscanf);
|
||||
}
|
||||
rc = (vsscanf)(str, fmt, va);
|
||||
va_end(va);
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/safemacros.h"
|
||||
#include "libc/bits/weaken.h"
|
||||
#include "libc/escape/escape.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/fmt/paland.inc"
|
||||
|
@ -176,6 +177,7 @@ int stoa(int out(int, void *), void *arg, void *data, unsigned long flags,
|
|||
if (w <= width && (flags & FLAGS_LEFT)) {
|
||||
if (spacepad(out, arg, width - w) == -1) return -1;
|
||||
}
|
||||
|
||||
if (!(flags & FLAGS_NOQUOTE) && (flags & FLAGS_REPR)) {
|
||||
if (out(qchar, arg) == -1) return -1;
|
||||
}
|
||||
|
|
|
@ -26,5 +26,5 @@
|
|||
char *strerror(int err) {
|
||||
alignas(1) static char buf[512];
|
||||
strerror_r(err, buf, sizeof(buf));
|
||||
return &buf[0];
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -27,14 +27,15 @@
|
|||
#include "libc/nt/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static const char *geterrname(int code) {
|
||||
const char *geterrname(int code) {
|
||||
extern const char kErrnoNames[];
|
||||
const long *e;
|
||||
const char *s;
|
||||
size_t i;
|
||||
for (i = 0, e = &E2BIG; e <= &EXFULL; ++e, ++i) {
|
||||
if (code == *e && (s = indexdoublenulstring(&kErrnoNames[0], i))) {
|
||||
return s;
|
||||
size_t i, n;
|
||||
e = &E2BIG;
|
||||
n = &EXFULL + 1 - e;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (code == e[i]) {
|
||||
return indexdoublenulstring(kErrnoNames, i);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
@ -46,8 +47,13 @@ static const char *geterrname(int code) {
|
|||
*/
|
||||
int strerror_r(int err, char *buf, size_t size) {
|
||||
const char *s;
|
||||
s = (err == -1 || IsTiny()) ? "?" : firstnonnull(geterrname(err), "?");
|
||||
if (err == -1 || IsTiny()) {
|
||||
s = "?";
|
||||
} else {
|
||||
s = firstnonnull(geterrname(err), "?");
|
||||
}
|
||||
if (!SupportsWindows()) {
|
||||
DebugBreak();
|
||||
snprintf(buf, size, "E%s[%d]", s, err);
|
||||
} else {
|
||||
char16_t buf16[100];
|
||||
|
|
|
@ -17,13 +17,276 @@
|
|||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/weaken.h"
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/fmt/vcscanf.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/tpdecodecb.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
|
||||
/**
|
||||
* Linkable callback-driven string / file / stream decoder.
|
||||
* @see libc/fmt/vcscanf.h (for docs and implementation)
|
||||
* String / file / stream decoder.
|
||||
*
|
||||
* This scanf implementation is able to tokenize strings containing
|
||||
* 8-bit through 128-bit integers (with validation), floating point
|
||||
* numbers, etc. It can also be used to convert UTF-8 to UTF-16/32.
|
||||
*
|
||||
* - `%d` parses integer
|
||||
* - `%ms` parses string allocating buffer assigning pointer
|
||||
*
|
||||
* @param callback supplies UTF-8 characters using -1 sentinel
|
||||
* @param fmt is a computer program embedded inside a c string, written
|
||||
* in a domain-specific programming language that, by design, lacks
|
||||
* Turing-completeness
|
||||
* @param va points to the variadic argument state
|
||||
* @see libc/fmt/pflink.h (dynamic memory is not a requirement)
|
||||
*/
|
||||
int(vcscanf)(int callback(void *), void *arg, const char *fmt, va_list ap) {
|
||||
return __vcscanf(callback, arg, fmt, ap);
|
||||
int vcscanf(int callback(void *), void *arg, const char *fmt, va_list va) {
|
||||
struct FreeMe {
|
||||
struct FreeMe *next;
|
||||
void *ptr;
|
||||
} *freeme = NULL;
|
||||
const unsigned char *p = (const unsigned char *)fmt;
|
||||
unsigned i = 0;
|
||||
int items = 0;
|
||||
int c = callback(arg);
|
||||
while (c != -1) {
|
||||
switch (p[i++]) {
|
||||
case '\0':
|
||||
return items;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\v':
|
||||
while (isspace(c)) c = callback(arg);
|
||||
break;
|
||||
case '%': {
|
||||
uintmax_t number;
|
||||
void *buf;
|
||||
size_t bufsize;
|
||||
unsigned width = 0;
|
||||
unsigned char bits = 32;
|
||||
unsigned char charbytes = sizeof(char);
|
||||
unsigned char diglet;
|
||||
unsigned char base;
|
||||
unsigned char prefix;
|
||||
bool rawmode = false;
|
||||
bool issigned = false;
|
||||
bool ismalloc = false;
|
||||
bool isneg = false;
|
||||
bool thousands = false;
|
||||
bool discard = false;
|
||||
for (;;) {
|
||||
switch (p[i++]) {
|
||||
case '%': /* %% → % */
|
||||
goto NonDirectiveCharacter;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
width *= 10;
|
||||
width += p[i - 1] - '0';
|
||||
break;
|
||||
case '*':
|
||||
discard = true;
|
||||
break;
|
||||
case 'm':
|
||||
ismalloc = true;
|
||||
break;
|
||||
case 'c':
|
||||
rawmode = true;
|
||||
if (!width) width = 1;
|
||||
/* εpsilon transition */
|
||||
case 's':
|
||||
goto DecodeString;
|
||||
case '\'':
|
||||
thousands = true;
|
||||
break;
|
||||
case 'j': /* 128-bit */
|
||||
bits = sizeof(intmax_t) * 8;
|
||||
break;
|
||||
case 'l': /* long */
|
||||
case 'L': /* loooong */
|
||||
charbytes = sizeof(wchar_t);
|
||||
/* fallthrough */
|
||||
case 't': /* ptrdiff_t */
|
||||
case 'Z': /* size_t */
|
||||
case 'z': /* size_t */
|
||||
bits = 64;
|
||||
break;
|
||||
case 'h': /* short and char */
|
||||
charbytes = sizeof(char16_t);
|
||||
bits >>= 1;
|
||||
break;
|
||||
case 'b': /* binary */
|
||||
base = 2;
|
||||
prefix = 'b';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'p': /* pointer (NexGen32e) */
|
||||
bits = 48;
|
||||
/* fallthrough */
|
||||
case 'x':
|
||||
case 'X': /* hexadecimal */
|
||||
base = 16;
|
||||
prefix = 'x';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'o': /* octal */
|
||||
base = 8;
|
||||
goto DecodeNumber;
|
||||
case 'd': /* decimal */
|
||||
case 'n': /* TODO(jart): flexidecimal */
|
||||
issigned = true;
|
||||
if (c == '+' || (isneg = c == '-')) {
|
||||
c = callback(arg);
|
||||
}
|
||||
/* εpsilon transition */
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto DecodeNumber;
|
||||
default:
|
||||
items = einval();
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
ConsumeBasePrefix:
|
||||
if (c == '0') {
|
||||
c = callback(arg);
|
||||
if (c == prefix || c == prefix + ('a' - 'A')) {
|
||||
c = callback(arg);
|
||||
}
|
||||
}
|
||||
DecodeNumber:
|
||||
if (c != -1) {
|
||||
number = 0;
|
||||
do {
|
||||
diglet = kBase36[(unsigned char)c];
|
||||
if (1 <= diglet && diglet <= base) {
|
||||
number *= base;
|
||||
number += diglet - 1;
|
||||
} else if (thousands && diglet == ',') {
|
||||
/* ignore */
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while ((c = callback(arg)) != -1);
|
||||
if (!discard) {
|
||||
uintmax_t bane = (uintmax_t)1 << (bits - 1);
|
||||
if (!(number & ~((bane - 1) | (issigned ? 0 : bane))) ||
|
||||
(issigned && number == bane /* two's complement bane */)) {
|
||||
++items;
|
||||
} else {
|
||||
items = erange();
|
||||
goto Done;
|
||||
}
|
||||
if (issigned && isneg) {
|
||||
number = ~number + 1;
|
||||
}
|
||||
void *out = va_arg(va, void *);
|
||||
switch (bits) {
|
||||
case sizeof(uintmax_t) * CHAR_BIT:
|
||||
*(uintmax_t *)out = number;
|
||||
break;
|
||||
case 48:
|
||||
case 64:
|
||||
*(uint64_t *)out = (uint64_t)number;
|
||||
break;
|
||||
case 32:
|
||||
*(uint32_t *)out = (uint32_t)number;
|
||||
break;
|
||||
case 16:
|
||||
*(uint16_t *)out = (uint16_t)number;
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
*(uint8_t *)out = (uint8_t)number;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
DecodeString:
|
||||
bufsize = !width ? 32 : rawmode ? width : width + 1;
|
||||
if (discard) {
|
||||
buf = NULL;
|
||||
} else if (ismalloc) {
|
||||
buf = weaken(malloc)(bufsize * charbytes);
|
||||
struct FreeMe *entry;
|
||||
if (buf && (entry = weaken(calloc)(1, sizeof(struct FreeMe)))) {
|
||||
entry->ptr = buf;
|
||||
entry->next = freeme;
|
||||
freeme = entry;
|
||||
}
|
||||
} else {
|
||||
buf = va_arg(va, void *);
|
||||
}
|
||||
if (buf) {
|
||||
size_t j = 0;
|
||||
for (;;) {
|
||||
if (ismalloc && !width && j + 2 + 1 >= bufsize &&
|
||||
!weaken(grow)(&buf, &bufsize, charbytes, 0)) {
|
||||
width = bufsize - 1;
|
||||
}
|
||||
if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
|
||||
if (charbytes == 1) {
|
||||
((unsigned char *)buf)[j++] = (unsigned char)c;
|
||||
c = callback(arg);
|
||||
} else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
|
||||
-1) {
|
||||
if (charbytes == sizeof(char16_t)) {
|
||||
j += abs(pututf16(&((char16_t *)buf)[j], bufsize - j - 1, c,
|
||||
false));
|
||||
} else {
|
||||
((wchar_t *)buf)[j++] = (wchar_t)c;
|
||||
}
|
||||
c = callback(arg);
|
||||
}
|
||||
} else {
|
||||
if (!rawmode && j < bufsize) {
|
||||
if (charbytes == sizeof(char)) {
|
||||
((unsigned char *)buf)[j] = '\0';
|
||||
} else if (charbytes == sizeof(char16_t)) {
|
||||
((char16_t *)buf)[j] = u'\0';
|
||||
} else if (charbytes == sizeof(wchar_t)) {
|
||||
((wchar_t *)buf)[j] = L'\0';
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
++items;
|
||||
if (ismalloc) {
|
||||
*va_arg(va, char **) = buf;
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
if (isspace(c)) break;
|
||||
} while ((c = callback(arg)) != -1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
NonDirectiveCharacter:
|
||||
c = (c == p[i - 1]) ? callback(arg) : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Done:
|
||||
while (freeme) {
|
||||
struct FreeMe *entry = freeme;
|
||||
freeme = entry->next;
|
||||
if (items == -1) {
|
||||
weaken(free_s)((void **)&entry->ptr);
|
||||
}
|
||||
weaken(free_s)((void **)&entry);
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
|
|
@ -1,297 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=8 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#ifndef COSMOPOLITAN_LIBC_FMT_VCSSCANF_H_
|
||||
#define COSMOPOLITAN_LIBC_FMT_VCSSCANF_H_
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/tpdecodecb.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/**
|
||||
* String / file / stream decoder.
|
||||
*
|
||||
* This scanf implementation is able to tokenize strings containing
|
||||
* 8-bit through 128-bit integers (with validation), floating point
|
||||
* numbers, etc. It can also be used to convert UTF-8 to UTF-16/32.
|
||||
*
|
||||
* - `%d` parses integer
|
||||
* - `%ms` parses string allocating buffer assigning pointer
|
||||
*
|
||||
* @param callback supplies UTF-8 characters using -1 sentinel
|
||||
* @param fmt is a computer program embedded inside a c string, written
|
||||
* in a domain-specific programming language that, by design, lacks
|
||||
* Turing-completeness
|
||||
* @param va points to the variadic argument state
|
||||
* @see libc/fmt/pflink.h (dynamic memory is not a requirement)
|
||||
*/
|
||||
forceinline int __vcscanf(int callback(void *), void *arg, const char *fmt,
|
||||
va_list va) {
|
||||
struct FreeMe {
|
||||
struct FreeMe *next;
|
||||
void *ptr;
|
||||
} *freeme = NULL;
|
||||
const unsigned char *p = (const unsigned char *)fmt;
|
||||
unsigned i = 0;
|
||||
int items = 0;
|
||||
int c = callback(arg);
|
||||
while (c != -1) {
|
||||
switch (p[i++]) {
|
||||
case '\0':
|
||||
return items;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\v':
|
||||
while (isspace(c)) c = callback(arg);
|
||||
break;
|
||||
case '%': {
|
||||
uintmax_t number;
|
||||
void *buf;
|
||||
size_t bufsize;
|
||||
unsigned width = 0;
|
||||
unsigned char bits = 32;
|
||||
unsigned char charbytes = sizeof(char);
|
||||
unsigned char diglet;
|
||||
unsigned char base;
|
||||
unsigned char prefix;
|
||||
bool rawmode = false;
|
||||
bool issigned = false;
|
||||
bool ismalloc = false;
|
||||
bool isneg = false;
|
||||
bool thousands = false;
|
||||
bool discard = false;
|
||||
for (;;) {
|
||||
switch (p[i++]) {
|
||||
case '%': /* %% → % */
|
||||
goto NonDirectiveCharacter;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
width *= 10;
|
||||
width += p[i - 1] - '0';
|
||||
break;
|
||||
case '*':
|
||||
discard = true;
|
||||
break;
|
||||
case 'm':
|
||||
ismalloc = true;
|
||||
break;
|
||||
case 'c':
|
||||
rawmode = true;
|
||||
if (!width) width = 1;
|
||||
/* εpsilon transition */
|
||||
case 's':
|
||||
goto DecodeString;
|
||||
case '\'':
|
||||
thousands = true;
|
||||
break;
|
||||
case 'j': /* 128-bit */
|
||||
bits = sizeof(intmax_t) * 8;
|
||||
break;
|
||||
case 'l': /* long */
|
||||
case 'L': /* loooong */
|
||||
charbytes = sizeof(wchar_t);
|
||||
/* fallthrough */
|
||||
case 't': /* ptrdiff_t */
|
||||
case 'Z': /* size_t */
|
||||
case 'z': /* size_t */
|
||||
bits = 64;
|
||||
break;
|
||||
case 'h': /* short and char */
|
||||
charbytes = sizeof(char16_t);
|
||||
bits >>= 1;
|
||||
break;
|
||||
case 'b': /* binary */
|
||||
base = 2;
|
||||
prefix = 'b';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'p': /* pointer (NexGen32e) */
|
||||
bits = 48;
|
||||
/* fallthrough */
|
||||
case 'x':
|
||||
case 'X': /* hexadecimal */
|
||||
base = 16;
|
||||
prefix = 'x';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'o': /* octal */
|
||||
base = 8;
|
||||
goto DecodeNumber;
|
||||
case 'd': /* decimal */
|
||||
case 'n': /* TODO(jart): flexidecimal */
|
||||
issigned = true;
|
||||
if (c == '+' || (isneg = c == '-')) {
|
||||
c = callback(arg);
|
||||
}
|
||||
/* εpsilon transition */
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto DecodeNumber;
|
||||
default:
|
||||
items = einval();
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
ConsumeBasePrefix:
|
||||
if (c == '0') {
|
||||
c = callback(arg);
|
||||
if (c == prefix || c == prefix + ('a' - 'A')) {
|
||||
c = callback(arg);
|
||||
}
|
||||
}
|
||||
DecodeNumber:
|
||||
if (c != -1) {
|
||||
number = 0;
|
||||
do {
|
||||
diglet = kBase36[(unsigned char)c];
|
||||
if (1 <= diglet && diglet <= base) {
|
||||
number *= base;
|
||||
number += diglet - 1;
|
||||
} else if (thousands && diglet == ',') {
|
||||
/* ignore */
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while ((c = callback(arg)) != -1);
|
||||
if (!discard) {
|
||||
uintmax_t bane = (uintmax_t)1 << (bits - 1);
|
||||
if (!(number & ~((bane - 1) | (issigned ? 0 : bane))) ||
|
||||
(issigned && number == bane /* two's complement bane */)) {
|
||||
++items;
|
||||
} else {
|
||||
items = erange();
|
||||
goto Done;
|
||||
}
|
||||
if (issigned && isneg) {
|
||||
number = ~number + 1;
|
||||
}
|
||||
void *out = va_arg(va, void *);
|
||||
switch (bits) {
|
||||
case sizeof(uintmax_t) * CHAR_BIT:
|
||||
*(uintmax_t *)out = number;
|
||||
break;
|
||||
case 48:
|
||||
case 64:
|
||||
*(uint64_t *)out = (uint64_t)number;
|
||||
break;
|
||||
case 32:
|
||||
*(uint32_t *)out = (uint32_t)number;
|
||||
break;
|
||||
case 16:
|
||||
*(uint16_t *)out = (uint16_t)number;
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
*(uint8_t *)out = (uint8_t)number;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
DecodeString:
|
||||
bufsize = !width ? 32 : rawmode ? width : width + 1;
|
||||
if (discard) {
|
||||
buf = NULL;
|
||||
} else if (ismalloc) {
|
||||
buf = weaken(malloc)(bufsize * charbytes);
|
||||
struct FreeMe *entry;
|
||||
if (buf && (entry = weaken(calloc)(1, sizeof(struct FreeMe)))) {
|
||||
entry->ptr = buf;
|
||||
entry->next = freeme;
|
||||
freeme = entry;
|
||||
}
|
||||
} else {
|
||||
buf = va_arg(va, void *);
|
||||
}
|
||||
if (buf) {
|
||||
size_t j = 0;
|
||||
for (;;) {
|
||||
if (ismalloc && !width && j + 2 + 1 >= bufsize &&
|
||||
!weaken(grow)(&buf, &bufsize, charbytes, 0)) {
|
||||
width = bufsize - 1;
|
||||
}
|
||||
if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
|
||||
if (charbytes == 1) {
|
||||
((unsigned char *)buf)[j++] = (unsigned char)c;
|
||||
c = callback(arg);
|
||||
} else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
|
||||
-1) {
|
||||
if (charbytes == sizeof(char16_t)) {
|
||||
j += abs(pututf16(&((char16_t *)buf)[j], bufsize - j - 1, c,
|
||||
false));
|
||||
} else {
|
||||
((wchar_t *)buf)[j++] = (wchar_t)c;
|
||||
}
|
||||
c = callback(arg);
|
||||
}
|
||||
} else {
|
||||
if (!rawmode && j < bufsize) {
|
||||
if (charbytes == sizeof(char)) {
|
||||
((unsigned char *)buf)[j] = '\0';
|
||||
} else if (charbytes == sizeof(char16_t)) {
|
||||
((char16_t *)buf)[j] = u'\0';
|
||||
} else if (charbytes == sizeof(wchar_t)) {
|
||||
((wchar_t *)buf)[j] = L'\0';
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
++items;
|
||||
if (ismalloc) {
|
||||
*va_arg(va, char **) = buf;
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
if (isspace(c)) break;
|
||||
} while ((c = callback(arg)) != -1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
NonDirectiveCharacter:
|
||||
c = (c == p[i - 1]) ? callback(arg) : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Done:
|
||||
while (freeme) {
|
||||
struct FreeMe *entry = freeme;
|
||||
freeme = entry->next;
|
||||
if (items == -1) {
|
||||
weaken(free_s)((void **)&entry->ptr);
|
||||
}
|
||||
weaken(free_s)((void **)&entry);
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_FMT_VCSSCANF_H_ */
|
|
@ -18,7 +18,24 @@
|
|||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/fmt/vsscanf.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
|
||||
struct StringScannerState {
|
||||
const unsigned char *s;
|
||||
size_t i;
|
||||
};
|
||||
|
||||
static int vsscanfcb(void *arg) {
|
||||
int res;
|
||||
struct StringScannerState *state;
|
||||
state = arg;
|
||||
if ((res = state->s[state->i])) {
|
||||
state->i++;
|
||||
} else {
|
||||
res = -1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes string.
|
||||
|
@ -29,5 +46,6 @@
|
|||
* a small code size penalty to using both
|
||||
*/
|
||||
int(vsscanf)(const char *str, const char *fmt, va_list va) {
|
||||
return __vsscanf(str, fmt, va, IsTiny() ? vcscanf : __vcscanf);
|
||||
struct StringScannerState state = {(const unsigned char *)str, 0};
|
||||
return vcscanf(vsscanfcb, &state, fmt, va);
|
||||
}
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTERNAL_VSSCANF_H_
|
||||
#define COSMOPOLITAN_LIBC_INTERNAL_VSSCANF_H_
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/fmt/vcscanf.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
struct StringScannerState {
|
||||
const unsigned char *s;
|
||||
size_t i;
|
||||
};
|
||||
|
||||
static inline int vsscanfcb(void *arg) {
|
||||
struct StringScannerState *state = arg;
|
||||
int res;
|
||||
if ((res = state->s[state->i])) {
|
||||
state->i++;
|
||||
} else {
|
||||
res = -1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* String decoder builder.
|
||||
*
|
||||
* This macro grants sscanf() and vsscanf() the choice to either link or
|
||||
* inline the full vcscanf() implementation.
|
||||
*
|
||||
* @see libc/fmt/vcscanf.h (for docs and implementation)
|
||||
*/
|
||||
static inline int __vsscanf(const char *str, const char *fmt, va_list ap,
|
||||
int impl(int callback(void *), void *arg,
|
||||
const char *fmt, va_list ap)) {
|
||||
struct StringScannerState state = {(const unsigned char *)str, 0};
|
||||
return impl(vsscanfcb, &state, fmt, ap);
|
||||
}
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTERNAL_VSSCANF_H_ */
|
Loading…
Add table
Add a link
Reference in a new issue