mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
611 lines
19 KiB
C
611 lines
19 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/fmt/conv.h"
|
|
#include "libc/fmt/internal.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/mem/internal.h"
|
|
#include "libc/mem/mem.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/str/tab.internal.h"
|
|
#include "libc/str/tpdecodecb.internal.h"
|
|
#include "libc/str/utf16.h"
|
|
#include "libc/sysv/errfuns.h"
|
|
#include "third_party/gdtoa/gdtoa.h"
|
|
|
|
#define READ \
|
|
({ \
|
|
int c = callback(arg); \
|
|
if (c != -1) \
|
|
++consumed; \
|
|
c; \
|
|
})
|
|
|
|
#define FP_BUFFER_GROW 48
|
|
#define BUFFER \
|
|
({ \
|
|
int c = READ; \
|
|
if (fpbufcur >= fpbufsize - 1) { \
|
|
fpbufsize = fpbufsize + FP_BUFFER_GROW; \
|
|
fpbuf = realloc(fpbuf, fpbufsize); \
|
|
} \
|
|
if (c != -1) { \
|
|
fpbuf[fpbufcur++] = c; \
|
|
fpbuf[fpbufcur] = '\0'; \
|
|
} \
|
|
c; \
|
|
})
|
|
#define UNBUFFER \
|
|
({ \
|
|
if (c != -1) { \
|
|
fpbuf[--fpbufcur] = '\0'; \
|
|
} \
|
|
})
|
|
|
|
/**
|
|
* String / file / stream decoder.
|
|
*
|
|
* This scanf implementation is able to tokenize strings containing
|
|
* 8-bit through 128-bit integers (with validation), floating point
|
|
* numbers, etc. It can also be used to convert UTF-8 to UTF-16/32.
|
|
*
|
|
* - `%d` parses integer
|
|
* - `%ms` parses string allocating buffer assigning pointer
|
|
*
|
|
* @param callback supplies UTF-8 characters using -1 sentinel
|
|
* @param fmt is a computer program embedded inside a c string, written
|
|
* in a domain-specific programming language that, by design, lacks
|
|
* Turing-completeness
|
|
* @param va points to the variadic argument state
|
|
* @see libc/fmt/pflink.h (dynamic memory is not a requirement)
|
|
*/
|
|
int __vcscanf(int callback(void *), //
|
|
int unget(int, void *), //
|
|
void *arg, //
|
|
const char *fmt, //
|
|
va_list va) {
|
|
struct FreeMe {
|
|
struct FreeMe *next;
|
|
void *ptr;
|
|
} *freeme = NULL;
|
|
unsigned char *fpbuf = NULL;
|
|
size_t fpbufsize;
|
|
size_t fpbufcur;
|
|
const unsigned char *p = (const unsigned char *)fmt;
|
|
int *n_ptr;
|
|
int items = 0;
|
|
int consumed = 0;
|
|
unsigned i = 0;
|
|
int c = READ;
|
|
for (;;) {
|
|
switch (p[i++]) {
|
|
case '\0':
|
|
if (c != -1 && unget) {
|
|
unget(c, arg);
|
|
}
|
|
goto Done;
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
case '\r':
|
|
case '\v':
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
break;
|
|
case '%': {
|
|
uint128_t number;
|
|
unsigned char *buf;
|
|
size_t bufsize;
|
|
double fp;
|
|
unsigned width = 0;
|
|
unsigned char bits = 32;
|
|
unsigned char charbytes = sizeof(char);
|
|
unsigned char diglet;
|
|
unsigned char base;
|
|
unsigned char prefix;
|
|
bool rawmode = false;
|
|
bool issigned = false;
|
|
bool ismalloc = false;
|
|
bool isneg = false;
|
|
bool thousands = false;
|
|
bool discard = false;
|
|
for (;;) {
|
|
switch (p[i++]) {
|
|
case '%': // %% → %
|
|
goto NonDirectiveCharacter;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
width *= 10;
|
|
width += p[i - 1] - '0';
|
|
break;
|
|
case '*':
|
|
discard = true;
|
|
break;
|
|
case 'm':
|
|
ismalloc = true;
|
|
break;
|
|
case 'c':
|
|
rawmode = true;
|
|
if (!width)
|
|
width = 1;
|
|
// fallthrough
|
|
case 's':
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
goto DecodeString;
|
|
case '\'':
|
|
thousands = true;
|
|
break;
|
|
case 'j': // j=64-bit jj=128-bit
|
|
if (bits < 64) {
|
|
bits = 64;
|
|
} else {
|
|
bits = 128;
|
|
}
|
|
break;
|
|
case 'l': // long
|
|
case 'L': // loooong
|
|
charbytes = sizeof(wchar_t);
|
|
// fallthrough
|
|
case 't': // ptrdiff_t
|
|
case 'Z': // size_t
|
|
case 'z': // size_t
|
|
bits = 64;
|
|
break;
|
|
case 'h': // short and char
|
|
charbytes = sizeof(char16_t);
|
|
bits >>= 1;
|
|
break;
|
|
case 'b': // binary
|
|
base = 2;
|
|
prefix = 'b';
|
|
goto ConsumeBasePrefix;
|
|
case 'p': // pointer (NexGen32e)
|
|
bits = 48;
|
|
// fallthrough
|
|
case 'x':
|
|
case 'X': // hexadecimal
|
|
base = 16;
|
|
prefix = 'x';
|
|
goto ConsumeBasePrefix;
|
|
case 'o': // octal
|
|
base = 8;
|
|
goto HandleNumber;
|
|
case 'n':
|
|
goto ReportConsumed;
|
|
case 'd': // decimal
|
|
issigned = true;
|
|
// fallthrough
|
|
case 'u':
|
|
base = 10;
|
|
HandleNumber:
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
if (c == '+' || (isneg = c == '-')) {
|
|
c = READ;
|
|
}
|
|
goto DecodeNumber;
|
|
case 'i': // flexidecimal
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
if (c == '+' || (isneg = c == '-')) {
|
|
c = READ;
|
|
}
|
|
if (c == '0') {
|
|
c = READ;
|
|
if (c == 'x' || c == 'X') {
|
|
c = READ;
|
|
base = 16;
|
|
} else if (c == 'b' || c == 'B') {
|
|
c = READ;
|
|
base = 2;
|
|
} else if ('0' <= c && c <= '7') {
|
|
base = 8;
|
|
} else {
|
|
number = 0;
|
|
goto GotNumber;
|
|
}
|
|
} else {
|
|
base = 10;
|
|
}
|
|
goto DecodeNumber;
|
|
case 'a':
|
|
case 'A':
|
|
case 'e':
|
|
case 'E':
|
|
case 'f':
|
|
case 'F':
|
|
case 'g':
|
|
case 'G': // floating point number
|
|
if (!(charbytes == sizeof(char) ||
|
|
charbytes == sizeof(wchar_t))) {
|
|
items = -1;
|
|
goto Done;
|
|
}
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
fpbufsize = FP_BUFFER_GROW;
|
|
if ((fpbuf = malloc(fpbufsize))) {
|
|
fpbufcur = 0;
|
|
fpbuf[fpbufcur++] = c;
|
|
fpbuf[fpbufcur] = '\0';
|
|
goto ConsumeFloatingPointNumber;
|
|
} else {
|
|
items = -1;
|
|
goto Done;
|
|
}
|
|
default:
|
|
items = einval();
|
|
goto Done;
|
|
}
|
|
}
|
|
ConsumeBasePrefix:
|
|
while (isspace(c)) {
|
|
c = READ;
|
|
}
|
|
if (c == '+' || (isneg = c == '-')) {
|
|
c = READ;
|
|
}
|
|
if (c == '0') {
|
|
c = READ;
|
|
if (c == prefix || c == prefix + ('a' - 'A')) {
|
|
c = READ;
|
|
} else if (c == -1) {
|
|
number = 0;
|
|
goto GotNumber;
|
|
}
|
|
}
|
|
DecodeNumber:
|
|
if (c != -1 && (1 <= kBase36[(unsigned char)c] &&
|
|
kBase36[(unsigned char)c] <= base)) {
|
|
number = 0;
|
|
width = !width ? bits : width;
|
|
do {
|
|
diglet = kBase36[(unsigned char)c];
|
|
if (1 <= diglet && diglet <= base) {
|
|
width -= 1;
|
|
number *= base;
|
|
number += diglet - 1;
|
|
} else if (thousands && diglet == ',') {
|
|
// ignore
|
|
} else {
|
|
break;
|
|
}
|
|
} while ((c = READ) != -1 && width > 0);
|
|
GotNumber:
|
|
if (!discard) {
|
|
uint128_t bane = (uint128_t)1 << (bits - 1);
|
|
if (!(number & ~((bane - 1) | (issigned ? 0 : bane))) ||
|
|
(issigned && number == bane)) {
|
|
++items;
|
|
} else {
|
|
items = erange();
|
|
goto Done;
|
|
}
|
|
if (issigned && isneg) {
|
|
number = ~number + 1;
|
|
}
|
|
void *out = va_arg(va, void *);
|
|
switch (bits) {
|
|
case sizeof(uint128_t) * CHAR_BIT:
|
|
*(uint128_t *)out = number;
|
|
break;
|
|
case 48:
|
|
case 64:
|
|
*(uint64_t *)out = (uint64_t)number;
|
|
break;
|
|
case 32:
|
|
*(uint32_t *)out = (uint32_t)number;
|
|
break;
|
|
case 16:
|
|
*(uint16_t *)out = (uint16_t)number;
|
|
break;
|
|
case 8:
|
|
default:
|
|
*(uint8_t *)out = (uint8_t)number;
|
|
break;
|
|
}
|
|
} else if (!items && c == -1) {
|
|
items = -1;
|
|
goto Done;
|
|
}
|
|
} else if (c == -1 && !items) {
|
|
items = -1;
|
|
goto Done;
|
|
} else {
|
|
if (c != -1 && unget) {
|
|
unget(c, arg);
|
|
}
|
|
goto Done;
|
|
}
|
|
continue;
|
|
ConsumeFloatingPointNumber:
|
|
if (c == '+' || c == '-') {
|
|
c = BUFFER;
|
|
}
|
|
bool hexadecimal = false;
|
|
if (c == '0') {
|
|
c = BUFFER;
|
|
if (c == 'x' || c == 'X') {
|
|
c = BUFFER;
|
|
hexadecimal = true;
|
|
goto BufferFloatingPointNumber;
|
|
} else if (c == -1) {
|
|
goto GotFloatingPointNumber;
|
|
} else {
|
|
goto BufferFloatingPointNumber;
|
|
}
|
|
} else if (c == 'n' || c == 'N') {
|
|
c = BUFFER;
|
|
if (c == 'a' || c == 'A') {
|
|
c = BUFFER;
|
|
if (c == 'n' || c == 'N') {
|
|
c = BUFFER;
|
|
if (c == '(') {
|
|
c = BUFFER;
|
|
do {
|
|
bool isdigit = c >= '0' && c <= '9';
|
|
bool isletter =
|
|
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
if (!(c == '_' || isdigit || isletter)) {
|
|
goto Done;
|
|
}
|
|
} while ((c = BUFFER) != -1 && c != ')');
|
|
if (c == ')') {
|
|
c = READ;
|
|
}
|
|
goto GotFloatingPointNumber;
|
|
} else {
|
|
UNBUFFER;
|
|
goto GotFloatingPointNumber;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else if (c == 'i' || c == 'I') {
|
|
c = BUFFER;
|
|
if (c == 'n' || c == 'N') {
|
|
c = BUFFER;
|
|
if (c == 'f' || c == 'F') {
|
|
c = BUFFER;
|
|
if (c == 'i' || c == 'I') {
|
|
c = BUFFER;
|
|
if (c == 'n' || c == 'N') {
|
|
c = BUFFER;
|
|
if (c == 'i' || c == 'I') {
|
|
c = BUFFER;
|
|
if (c == 't' || c == 'T') {
|
|
c = BUFFER;
|
|
if (c == 'y' || c == 'Y') {
|
|
c = BUFFER;
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
UNBUFFER;
|
|
goto GotFloatingPointNumber;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
} else {
|
|
goto Done;
|
|
}
|
|
}
|
|
BufferFloatingPointNumber:
|
|
enum { INTEGER, FRACTIONAL, SIGN, EXPONENT } state = INTEGER;
|
|
do {
|
|
bool isdecdigit = c >= '0' && c <= '9';
|
|
bool ishexdigit = (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
bool ispoint = c == '.' || c == ',';
|
|
bool isdecexp = c == 'e' || c == 'E';
|
|
bool ishexp = c == 'p' || c == 'P';
|
|
bool issign = c == '+' || c == '-';
|
|
|
|
switch (state) {
|
|
case INTEGER:
|
|
case FRACTIONAL:
|
|
if (isdecdigit || (hexadecimal && ishexdigit)) {
|
|
goto Continue;
|
|
} else if (state == INTEGER && ispoint) {
|
|
state = FRACTIONAL;
|
|
goto Continue;
|
|
} else if (isdecexp || (hexadecimal && ishexp)) {
|
|
state = SIGN;
|
|
goto Continue;
|
|
} else {
|
|
goto Break;
|
|
}
|
|
case SIGN:
|
|
if (issign) {
|
|
state = EXPONENT;
|
|
goto Continue;
|
|
}
|
|
state = EXPONENT;
|
|
// fallthrough
|
|
case EXPONENT:
|
|
if (isdecdigit) {
|
|
goto Continue;
|
|
} else {
|
|
goto Break;
|
|
}
|
|
default:
|
|
goto Break;
|
|
}
|
|
Continue:
|
|
continue;
|
|
Break:
|
|
UNBUFFER;
|
|
break;
|
|
} while ((c = BUFFER) != -1);
|
|
GotFloatingPointNumber:
|
|
/* An empty buffer can't be a valid float; don't even bother parsing. */
|
|
bool valid = fpbufcur > 0;
|
|
if (valid) {
|
|
char *ep;
|
|
fp = strtod((char *)fpbuf, &ep);
|
|
/* We should have parsed the whole buffer. */
|
|
valid = ep == (char *)fpbuf + fpbufcur;
|
|
}
|
|
free(fpbuf);
|
|
fpbuf = NULL;
|
|
fpbufcur = fpbufsize = 0;
|
|
if (!valid) {
|
|
goto Done;
|
|
}
|
|
if (!discard) {
|
|
++items;
|
|
void *out = va_arg(va, void *);
|
|
if (charbytes == sizeof(char)) {
|
|
*(float *)out = (float)fp;
|
|
} else {
|
|
*(double *)out = (double)fp;
|
|
}
|
|
}
|
|
continue;
|
|
ReportConsumed:
|
|
n_ptr = va_arg(va, int *);
|
|
if (c != -1) {
|
|
*n_ptr = consumed - 1; // minus lookahead
|
|
} else {
|
|
*n_ptr = consumed;
|
|
}
|
|
continue;
|
|
DecodeString:
|
|
bufsize = !width ? 32 : rawmode ? width : width + 1;
|
|
if (discard) {
|
|
buf = NULL;
|
|
} else if (ismalloc) {
|
|
if ((buf = malloc(bufsize * charbytes))) {
|
|
struct FreeMe *entry;
|
|
if (buf && (entry = calloc(1, sizeof(struct FreeMe)))) {
|
|
entry->ptr = buf;
|
|
entry->next = freeme;
|
|
freeme = entry;
|
|
}
|
|
} else {
|
|
items = -1;
|
|
goto Done;
|
|
}
|
|
} else {
|
|
buf = va_arg(va, void *);
|
|
}
|
|
if (buf) {
|
|
size_t j = 0;
|
|
for (;;) {
|
|
if (ismalloc && !width && j + 2 + 1 >= bufsize &&
|
|
!__grow(&buf, &bufsize, charbytes, 0)) {
|
|
width = bufsize - 1;
|
|
}
|
|
if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
|
|
if (charbytes == 1) {
|
|
buf[j++] = (unsigned char)c;
|
|
c = READ;
|
|
} else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
|
|
-1) {
|
|
if (charbytes == sizeof(char16_t)) {
|
|
unsigned w = EncodeUtf16(c);
|
|
do {
|
|
if ((j + 1) * 2 < bufsize) {
|
|
((char16_t *)buf)[j++] = w;
|
|
}
|
|
} while ((w >>= 16));
|
|
} else {
|
|
((wchar_t *)buf)[j++] = (wchar_t)c;
|
|
}
|
|
c = READ;
|
|
}
|
|
} else {
|
|
if (!j && c == -1 && !items) {
|
|
items = -1;
|
|
goto Done;
|
|
} else if (rawmode && j != width) {
|
|
/* The C standard says that %c "matches a sequence of characters
|
|
* of
|
|
* **exactly** the number specified by the field width". If we
|
|
* have fewer characters, what we've just read is invalid. */
|
|
goto Done;
|
|
} else if (!rawmode && j < bufsize) {
|
|
if (charbytes == sizeof(char)) {
|
|
buf[j] = '\0';
|
|
} else if (charbytes == sizeof(char16_t)) {
|
|
((char16_t *)buf)[j] = u'\0';
|
|
} else if (charbytes == sizeof(wchar_t)) {
|
|
((wchar_t *)buf)[j] = L'\0';
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
++items;
|
|
if (ismalloc) {
|
|
*va_arg(va, char **) = (void *)buf;
|
|
}
|
|
buf = NULL;
|
|
} else {
|
|
do {
|
|
if (isspace(c))
|
|
break;
|
|
} while ((c = READ) != -1);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
NonDirectiveCharacter:
|
|
c = (c == p[i - 1]) ? READ : -1;
|
|
break;
|
|
}
|
|
}
|
|
Done:
|
|
while (freeme) {
|
|
struct FreeMe *entry = freeme;
|
|
freeme = entry->next;
|
|
if (items == -1)
|
|
free(entry->ptr);
|
|
free(entry);
|
|
}
|
|
if (fpbuf)
|
|
free(fpbuf);
|
|
return items;
|
|
}
|