mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-25 02:30:57 +00:00 
			
		
		
		
	- Fix preadv() and pwritev() for old distros - Introduce _npassert() and _unassert() macros - Prove that file locks work properly on Windows - Support fcntl(F_DUPFD_CLOEXEC) on more systems
		
			
				
	
	
		
			309 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			309 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | |
| │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
 | |
| ╞══════════════════════════════════════════════════════════════════════════════╡
 | |
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │
 | |
| │                                                                              │
 | |
| │ Permission to use, copy, modify, and/or distribute this software for         │
 | |
| │ any purpose with or without fee is hereby granted, provided that the         │
 | |
| │ above copyright notice and this permission notice appear in all copies.      │
 | |
| │                                                                              │
 | |
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
 | |
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
 | |
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
 | |
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
 | |
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
 | |
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
 | |
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 | |
| │ PERFORMANCE OF THIS SOFTWARE.                                                │
 | |
| ╚─────────────────────────────────────────────────────────────────────────────*/
 | |
| #include "libc/fmt/conv.h"
 | |
| #include "libc/fmt/fmt.h"
 | |
| #include "libc/intrin/weaken.h"
 | |
| #include "libc/mem/mem.h"
 | |
| #include "libc/runtime/runtime.h"
 | |
| #include "libc/str/oldutf16.internal.h"
 | |
| #include "libc/str/str.h"
 | |
| #include "libc/str/tab.internal.h"
 | |
| #include "libc/str/tpdecodecb.internal.h"
 | |
| #include "libc/str/utf16.h"
 | |
| #include "libc/sysv/errfuns.h"
 | |
| 
 | |
| /**
 | |
|  * String / file / stream decoder.
 | |
|  *
 | |
|  * This scanf implementation is able to tokenize strings containing
 | |
|  * 8-bit through 128-bit integers (with validation), floating point
 | |
|  * numbers, etc. It can also be used to convert UTF-8 to UTF-16/32.
 | |
|  *
 | |
|  *   - `%d`  parses integer
 | |
|  *   - `%ms` parses string allocating buffer assigning pointer
 | |
|  *
 | |
|  * @param callback supplies UTF-8 characters using -1 sentinel
 | |
|  * @param fmt is a computer program embedded inside a c string, written
 | |
|  *     in a domain-specific programming language that, by design, lacks
 | |
|  *     Turing-completeness
 | |
|  * @param va points to the variadic argument state
 | |
|  * @see libc/fmt/pflink.h (dynamic memory is not a requirement)
 | |
|  */
 | |
| int vcscanf(int callback(void *), int unget(int, void *), void *arg,
 | |
|             const char *fmt, va_list va) {
 | |
|   struct FreeMe {
 | |
|     struct FreeMe *next;
 | |
|     void *ptr;
 | |
|   } *freeme = NULL;
 | |
|   const unsigned char *p = (const unsigned char *)fmt;
 | |
|   unsigned i = 0;
 | |
|   int items = 0;
 | |
|   int c = callback(arg);
 | |
|   while (c != -1) {
 | |
|     switch (p[i++]) {
 | |
|       case '\0':
 | |
|         if (c != -1 && unget) {
 | |
|           unget(c, arg);
 | |
|         }
 | |
|         goto Done;
 | |
|       case ' ':
 | |
|       case '\t':
 | |
|       case '\n':
 | |
|       case '\r':
 | |
|       case '\v':
 | |
|         while (isspace(c)) {
 | |
|           c = callback(arg);
 | |
|         }
 | |
|         break;
 | |
|       case '%': {
 | |
|         uint128_t number;
 | |
|         void *buf;
 | |
|         size_t bufsize;
 | |
|         unsigned width = 0;
 | |
|         unsigned char bits = 32;
 | |
|         unsigned char charbytes = sizeof(char);
 | |
|         unsigned char diglet;
 | |
|         unsigned char base;
 | |
|         unsigned char prefix;
 | |
|         bool rawmode = false;
 | |
|         bool issigned = false;
 | |
|         bool ismalloc = false;
 | |
|         bool isneg = false;
 | |
|         bool thousands = false;
 | |
|         bool discard = false;
 | |
|         for (;;) {
 | |
|           switch (p[i++]) {
 | |
|             case '%': /* %% → % */
 | |
|               goto NonDirectiveCharacter;
 | |
|             case '0':
 | |
|             case '1':
 | |
|             case '2':
 | |
|             case '3':
 | |
|             case '4':
 | |
|             case '5':
 | |
|             case '6':
 | |
|             case '7':
 | |
|             case '8':
 | |
|             case '9':
 | |
|               width *= 10;
 | |
|               width += p[i - 1] - '0';
 | |
|               break;
 | |
|             case '*':
 | |
|               discard = true;
 | |
|               break;
 | |
|             case 'm':
 | |
|               ismalloc = true;
 | |
|               break;
 | |
|             case 'c':
 | |
|               rawmode = true;
 | |
|               if (!width) width = 1;
 | |
|               /* εpsilon transition */
 | |
|             case 's':
 | |
|               goto DecodeString;
 | |
|             case '\'':
 | |
|               thousands = true;
 | |
|               break;
 | |
|             case 'j': /* j=64-bit jj=128-bit */
 | |
|               if (bits < 64) {
 | |
|                 bits = 64;
 | |
|               } else {
 | |
|                 bits = 128;
 | |
|               }
 | |
|               break;
 | |
|             case 'l': /* long */
 | |
|             case 'L': /* loooong */
 | |
|               charbytes = sizeof(wchar_t);
 | |
|               /* fallthrough */
 | |
|             case 't': /* ptrdiff_t */
 | |
|             case 'Z': /* size_t */
 | |
|             case 'z': /* size_t */
 | |
|               bits = 64;
 | |
|               break;
 | |
|             case 'h': /* short and char */
 | |
|               charbytes = sizeof(char16_t);
 | |
|               bits >>= 1;
 | |
|               break;
 | |
|             case 'b': /* binary */
 | |
|               base = 2;
 | |
|               prefix = 'b';
 | |
|               goto ConsumeBasePrefix;
 | |
|             case 'p': /* pointer (NexGen32e) */
 | |
|               bits = 48;
 | |
|               /* fallthrough */
 | |
|             case 'x':
 | |
|             case 'X': /* hexadecimal */
 | |
|               base = 16;
 | |
|               prefix = 'x';
 | |
|               goto ConsumeBasePrefix;
 | |
|             case 'o': /* octal */
 | |
|               base = 8;
 | |
|               goto DecodeNumber;
 | |
|             case 'd': /* decimal */
 | |
|             case 'n': /* TODO(jart): flexidecimal */
 | |
|               issigned = true;
 | |
|               if (c == '+' || (isneg = c == '-')) {
 | |
|                 c = callback(arg);
 | |
|               }
 | |
|               /* εpsilon transition */
 | |
|             case 'u':
 | |
|               base = 10;
 | |
|               goto DecodeNumber;
 | |
|             default:
 | |
|               items = einval();
 | |
|               goto Done;
 | |
|           }
 | |
|         }
 | |
|       ConsumeBasePrefix:
 | |
|         if (c == '0') {
 | |
|           c = callback(arg);
 | |
|           if (c == prefix || c == prefix + ('a' - 'A')) {
 | |
|             c = callback(arg);
 | |
|           }
 | |
|         }
 | |
|       DecodeNumber:
 | |
|         if (c != -1) {
 | |
|           number = 0;
 | |
|           width = !width ? bits : width;
 | |
|           do {
 | |
|             diglet = kBase36[(unsigned char)c];
 | |
|             if (1 <= diglet && diglet <= base) {
 | |
|               width -= 1;
 | |
|               number *= base;
 | |
|               number += diglet - 1;
 | |
|             } else if (thousands && diglet == ',') {
 | |
|               /* ignore */
 | |
|             } else {
 | |
|               break;
 | |
|             }
 | |
|           } while ((c = callback(arg)) != -1 && width > 0);
 | |
|           if (!discard) {
 | |
|             uint128_t bane = (uint128_t)1 << (bits - 1);
 | |
|             if (!(number & ~((bane - 1) | (issigned ? 0 : bane))) ||
 | |
|                 (issigned && number == bane /* two's complement bane */)) {
 | |
|               ++items;
 | |
|             } else {
 | |
|               items = erange();
 | |
|               goto Done;
 | |
|             }
 | |
|             if (issigned && isneg) {
 | |
|               number = ~number + 1;
 | |
|             }
 | |
|             void *out = va_arg(va, void *);
 | |
|             switch (bits) {
 | |
|               case sizeof(uint128_t) * CHAR_BIT:
 | |
|                 *(uint128_t *)out = number;
 | |
|                 break;
 | |
|               case 48:
 | |
|               case 64:
 | |
|                 *(uint64_t *)out = (uint64_t)number;
 | |
|                 break;
 | |
|               case 32:
 | |
|                 *(uint32_t *)out = (uint32_t)number;
 | |
|                 break;
 | |
|               case 16:
 | |
|                 *(uint16_t *)out = (uint16_t)number;
 | |
|                 break;
 | |
|               case 8:
 | |
|               default:
 | |
|                 *(uint8_t *)out = (uint8_t)number;
 | |
|                 break;
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|         continue;
 | |
|       DecodeString:
 | |
|         bufsize = !width ? 32 : rawmode ? width : width + 1;
 | |
|         if (discard) {
 | |
|           buf = NULL;
 | |
|         } else if (ismalloc) {
 | |
|           buf = _weaken(malloc)(bufsize * charbytes);
 | |
|           struct FreeMe *entry;
 | |
|           if (buf && (entry = _weaken(calloc)(1, sizeof(struct FreeMe)))) {
 | |
|             entry->ptr = buf;
 | |
|             entry->next = freeme;
 | |
|             freeme = entry;
 | |
|           }
 | |
|         } else {
 | |
|           buf = va_arg(va, void *);
 | |
|         }
 | |
|         if (buf) {
 | |
|           size_t j = 0;
 | |
|           for (;;) {
 | |
|             if (ismalloc && !width && j + 2 + 1 >= bufsize &&
 | |
|                 !_weaken(__grow)(&buf, &bufsize, charbytes, 0)) {
 | |
|               width = bufsize - 1;
 | |
|             }
 | |
|             if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) {
 | |
|               if (charbytes == 1) {
 | |
|                 ((unsigned char *)buf)[j++] = (unsigned char)c;
 | |
|                 c = callback(arg);
 | |
|               } else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) !=
 | |
|                          -1) {
 | |
|                 if (charbytes == sizeof(char16_t)) {
 | |
|                   size_t k = 0;
 | |
|                   unsigned w = EncodeUtf16(c);
 | |
|                   do {
 | |
|                     if ((j + 1) * 2 < bufsize) {
 | |
|                       ((char16_t *)buf)[j++] = w;
 | |
|                     }
 | |
|                   } while ((w >>= 16));
 | |
|                 } else {
 | |
|                   ((wchar_t *)buf)[j++] = (wchar_t)c;
 | |
|                 }
 | |
|                 c = callback(arg);
 | |
|               }
 | |
|             } else {
 | |
|               if (!rawmode && j < bufsize) {
 | |
|                 if (charbytes == sizeof(char)) {
 | |
|                   ((unsigned char *)buf)[j] = '\0';
 | |
|                 } else if (charbytes == sizeof(char16_t)) {
 | |
|                   ((char16_t *)buf)[j] = u'\0';
 | |
|                 } else if (charbytes == sizeof(wchar_t)) {
 | |
|                   ((wchar_t *)buf)[j] = L'\0';
 | |
|                 }
 | |
|               }
 | |
|               break;
 | |
|             }
 | |
|           }
 | |
|           ++items;
 | |
|           if (ismalloc) {
 | |
|             *va_arg(va, char **) = buf;
 | |
|           }
 | |
|         } else {
 | |
|           do {
 | |
|             if (isspace(c)) break;
 | |
|           } while ((c = callback(arg)) != -1);
 | |
|         }
 | |
|         break;
 | |
|       }
 | |
|       default:
 | |
|       NonDirectiveCharacter:
 | |
|         c = (c == p[i - 1]) ? callback(arg) : -1;
 | |
|         break;
 | |
|     }
 | |
|   }
 | |
| Done:
 | |
|   while (freeme && _weaken(free)) {
 | |
|     struct FreeMe *entry = freeme;
 | |
|     freeme = entry->next;
 | |
|     if (items == -1) _weaken(free)(entry->ptr);
 | |
|     _weaken(free)(entry);
 | |
|   }
 | |
|   return items;
 | |
| }
 |