mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 05:42:29 +00:00
Remove printf() linking hack
This commit is contained in:
parent
ba03cd95c5
commit
b881c0ec9e
107 changed files with 1520 additions and 2577 deletions
310
libc/stdio/vcscanf.c
Normal file
310
libc/stdio/vcscanf.c
Normal file
|
@ -0,0 +1,310 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/fmt/conv.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/tab.internal.h"
|
||||
#include "libc/str/tpdecodecb.internal.h"
|
||||
#include "libc/str/utf16.h"
|
||||
#include "libc/sysv/errfuns.h"
|
||||
|
||||
/**
|
||||
* String / file / stream decoder.
|
||||
*
|
||||
* This scanf implementation is able to tokenize strings containing
|
||||
* 8-bit through 128-bit integers (with validation), floating point
|
||||
* numbers, etc. It can also be used to convert UTF-8 to UTF-16/32.
|
||||
*
|
||||
* - `%d` parses integer
|
||||
* - `%ms` parses string allocating buffer assigning pointer
|
||||
*
|
||||
* @param callback supplies UTF-8 characters using -1 sentinel
|
||||
* @param fmt is a computer program embedded inside a c string, written
|
||||
* in a domain-specific programming language that, by design, lacks
|
||||
* Turing-completeness
|
||||
* @param va points to the variadic argument state
|
||||
* @see libc/fmt/pflink.h (dynamic memory is not a requirement)
|
||||
*/
|
||||
int vcscanf(int callback(void *), int unget(int, void *), void *arg,
|
||||
const char *fmt, va_list va) {
|
||||
struct FreeMe {
|
||||
struct FreeMe *next;
|
||||
void *ptr;
|
||||
} *freeme = NULL;
|
||||
const unsigned char *p = (const unsigned char *)fmt;
|
||||
unsigned i = 0;
|
||||
int items = 0;
|
||||
int c = callback(arg);
|
||||
while (c != -1) {
|
||||
switch (p[i++]) {
|
||||
case '\0':
|
||||
if (c != -1 && unget) {
|
||||
unget(c, arg);
|
||||
}
|
||||
goto Done;
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\v':
|
||||
while (isspace(c)) {
|
||||
c = callback(arg);
|
||||
}
|
||||
break;
|
||||
case '%': {
|
||||
uint128_t number;
|
||||
void *buf;
|
||||
size_t bufsize;
|
||||
unsigned width = 0;
|
||||
unsigned char bits = 32;
|
||||
unsigned char charbytes = sizeof(char);
|
||||
unsigned char diglet;
|
||||
unsigned char base;
|
||||
unsigned char prefix;
|
||||
bool rawmode = false;
|
||||
bool issigned = false;
|
||||
bool ismalloc = false;
|
||||
bool isneg = false;
|
||||
bool thousands = false;
|
||||
bool discard = false;
|
||||
for (;;) {
|
||||
switch (p[i++]) {
|
||||
case '%': /* %% → % */
|
||||
goto NonDirectiveCharacter;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
width *= 10;
|
||||
width += p[i - 1] - '0';
|
||||
break;
|
||||
case '*':
|
||||
discard = true;
|
||||
break;
|
||||
case 'm':
|
||||
ismalloc = true;
|
||||
break;
|
||||
case 'c':
|
||||
rawmode = true;
|
||||
if (!width) width = 1;
|
||||
/* εpsilon transition */
|
||||
case 's':
|
||||
goto DecodeString;
|
||||
case '\'':
|
||||
thousands = true;
|
||||
break;
|
||||
case 'j': /* j=64-bit jj=128-bit */
|
||||
if (bits < 64) {
|
||||
bits = 64;
|
||||
} else {
|
||||
bits = 128;
|
||||
}
|
||||
break;
|
||||
case 'l': /* long */
|
||||
case 'L': /* loooong */
|
||||
charbytes = sizeof(wchar_t);
|
||||
/* fallthrough */
|
||||
case 't': /* ptrdiff_t */
|
||||
case 'Z': /* size_t */
|
||||
case 'z': /* size_t */
|
||||
bits = 64;
|
||||
break;
|
||||
case 'h': /* short and char */
|
||||
charbytes = sizeof(char16_t);
|
||||
bits >>= 1;
|
||||
break;
|
||||
case 'b': /* binary */
|
||||
base = 2;
|
||||
prefix = 'b';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'p': /* pointer (NexGen32e) */
|
||||
bits = 48;
|
||||
/* fallthrough */
|
||||
case 'x':
|
||||
case 'X': /* hexadecimal */
|
||||
base = 16;
|
||||
prefix = 'x';
|
||||
goto ConsumeBasePrefix;
|
||||
case 'o': /* octal */
|
||||
base = 8;
|
||||
goto DecodeNumber;
|
||||
case 'd': /* decimal */
|
||||
case 'n': /* TODO(jart): flexidecimal */
|
||||
issigned = true;
|
||||
if (c == '+' || (isneg = c == '-')) {
|
||||
c = callback(arg);
|
||||
}
|
||||
/* εpsilon transition */
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto DecodeNumber;
|
||||
default:
|
||||
items = einval();
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
ConsumeBasePrefix:
|
||||
if (c == '0') {
|
||||
c = callback(arg);
|
||||
if (c == prefix || c == prefix + ('a' - 'A')) {
|
||||
c = callback(arg);
|
||||
} else if (c == -1) {
|
||||
c = '0';
|
||||
}
|
||||
}
|
||||
DecodeNumber:
|
||||
if (c != -1) {
|
||||
number = 0;
|
||||
width = !width ? bits : width;
|
||||
do {
|
||||
diglet = kBase36[(unsigned char)c];
|
||||
if (1 <= diglet && diglet <= base) {
|
||||
width -= 1;
|
||||
number *= base;
|
||||
number += diglet - 1;
|
||||
} else if (thousands && diglet == ',') {
|
||||
/* ignore */
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while ((c = callback(arg)) != -1 && width > 0);
|
||||
if (!discard) {
|
||||
uint128_t bane = (uint128_t)1 << (bits - 1);
|
||||
if (!(number & ~((bane - 1) | (issigned ? 0 : bane))) ||
|
||||
(issigned && number == bane /* two's complement bane */)) {
|
||||
++items;
|
||||
} else {
|
||||
items = erange();
|
||||
goto Done;
|
||||
}
|
||||
if (issigned && isneg) {
|
||||
number = ~number + 1;
|
||||
}
|
||||
void *out = va_arg(va, void *);
|
||||
switch (bits) {
|
||||
case sizeof(uint128_t) * CHAR_BIT:
|
||||
*(uint128_t *)out = number;
|
||||
break;
|
||||
case 48:
|
||||
case 64:
|
||||
*(uint64_t *)out = (uint64_t)number;
|
||||
break;
|
||||
case 32:
|
||||
*(uint32_t *)out = (uint32_t)number;
|
||||
break;
|
||||
case 16:
|
||||
*(uint16_t *)out = (uint16_t)number;
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
*(uint8_t *)out = (uint8_t)number;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
DecodeString:
|
||||
bufsize = !width ? 32 : rawmode ? width : width + 1;
|
||||
if (discard) {
|
||||
buf = NULL;
|
||||
} else if (ismalloc) {
|
||||
buf = _weaken(malloc)(bufsize * charbytes);
|
||||
struct FreeMe *entry;
|
||||
if (buf && (entry = _weaken(calloc)(1, sizeof(struct FreeMe)))) {
|
||||
entry->ptr = buf;
|
||||
entry->next = freeme;
|
||||
freeme = entry;
|
||||
}
|
||||
} else {
|
||||
buf = va_arg(va, void *);
|
||||
}
|
||||
if (buf) {
|
||||
size_t j = 0;
|
||||
for (;;) {
|
||||
if (ismalloc && !width && j + 2 + 1 >= bufsize &&
|
||||
!_weaken(__grow)(&buf, &bufsize, charbytes, 0)) {
|
||||
width = bufsize - 1;
|
||||
}
|
||||
if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
|
||||
if (charbytes == 1) {
|
||||
((unsigned char *)buf)[j++] = (unsigned char)c;
|
||||
c = callback(arg);
|
||||
} else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
|
||||
-1) {
|
||||
if (charbytes == sizeof(char16_t)) {
|
||||
size_t k = 0;
|
||||
unsigned w = EncodeUtf16(c);
|
||||
do {
|
||||
if ((j + 1) * 2 < bufsize) {
|
||||
((char16_t *)buf)[j++] = w;
|
||||
}
|
||||
} while ((w >>= 16));
|
||||
} else {
|
||||
((wchar_t *)buf)[j++] = (wchar_t)c;
|
||||
}
|
||||
c = callback(arg);
|
||||
}
|
||||
} else {
|
||||
if (!rawmode && j < bufsize) {
|
||||
if (charbytes == sizeof(char)) {
|
||||
((unsigned char *)buf)[j] = '\0';
|
||||
} else if (charbytes == sizeof(char16_t)) {
|
||||
((char16_t *)buf)[j] = u'\0';
|
||||
} else if (charbytes == sizeof(wchar_t)) {
|
||||
((wchar_t *)buf)[j] = L'\0';
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
++items;
|
||||
if (ismalloc) {
|
||||
*va_arg(va, char **) = buf;
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
if (isspace(c)) break;
|
||||
} while ((c = callback(arg)) != -1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
NonDirectiveCharacter:
|
||||
c = (c == p[i - 1]) ? callback(arg) : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Done:
|
||||
while (freeme && _weaken(free)) {
|
||||
struct FreeMe *entry = freeme;
|
||||
freeme = entry->next;
|
||||
if (items == -1) _weaken(free)(entry->ptr);
|
||||
_weaken(free)(entry);
|
||||
}
|
||||
return items;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue