mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 11:10:58 +00:00 
			
		
		
		
	- Python static hello world now 1.8mb
- Python static fully loaded now 10mb
- Python HTTPS client now uses MbedTLS
- Python REPL now completes import stmts
- Increase stack size for Python for now
- Begin synthesizing posixpath and ntpath
- Restore Python \N{UNICODE NAME} support
- Restore Python NFKD symbol normalization
- Add optimized code path for Intel SHA-NI
- Get more Python unit tests passing faster
- Get Python help() pagination working on NT
- Python hashlib now supports MbedTLS PBKDF2
- Make memcpy/memmove/memcmp/bcmp/etc. faster
- Add Mersenne Twister and Vigna to LIBC_RAND
- Provide privileged __printf() for error code
- Fix zipos opendir() so that it reports ENOTDIR
- Add basic chmod() implementation for Windows NT
- Add Cosmo's best functions to Python cosmo module
- Pin function trace indent depth to that of caller
- Show memory diagram on invalid access in MODE=dbg
- Differentiate stack overflow on crash in MODE=dbg
- Add stb_truetype and tools for analyzing font files
- Upgrade to UNICODE 13 and reduce its binary footprint
- COMPILE.COM now logs resource usage of build commands
- Start implementing basic poll() support on bare metal
- Set getauxval(AT_EXECFN) to GetModuleFileName() on NT
- Add descriptions to strerror() in non-TINY build modes
- Add COUNTBRANCH() macro to help with micro-optimizations
- Make error / backtrace / asan / memory code more unbreakable
- Add fast perfect C implementation of μ-Law and a-Law audio codecs
- Make strtol() functions consistent with other libc implementations
- Improve Linenoise implementation (see also github.com/jart/bestline)
- COMPILE.COM now suppresses stdout/stderr of successful build commands
		
	
			
		
			
				
	
	
		
			91 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			91 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
 | |
| │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
 | |
| ╞══════════════════════════════════════════════════════════════════════════════╡
 | |
| │ Copyright 2021 Justine Alexandra Roberts Tunney                              │
 | |
| │                                                                              │
 | |
| │ Permission to use, copy, modify, and/or distribute this software for         │
 | |
| │ any purpose with or without fee is hereby granted, provided that the         │
 | |
| │ above copyright notice and this permission notice appear in all copies.      │
 | |
| │                                                                              │
 | |
| │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
 | |
| │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
 | |
| │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
 | |
| │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
 | |
| │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
 | |
| │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
 | |
| │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 | |
| │ PERFORMANCE OF THIS SOFTWARE.                                                │
 | |
| ╚─────────────────────────────────────────────────────────────────────────────*/
 | |
| #include "libc/bits/bits.h"
 | |
| #include "libc/intrin/packsswb.h"
 | |
| #include "libc/intrin/pandn.h"
 | |
| #include "libc/intrin/pcmpgtb.h"
 | |
| #include "libc/intrin/pcmpgtw.h"
 | |
| #include "libc/intrin/pmovmskb.h"
 | |
| #include "libc/intrin/punpckhbw.h"
 | |
| #include "libc/intrin/punpcklbw.h"
 | |
| #include "libc/mem/mem.h"
 | |
| #include "libc/nexgen32e/bsr.h"
 | |
| #include "libc/str/str.h"
 | |
| #include "libc/str/thompike.h"
 | |
| #include "libc/str/tpenc.h"
 | |
| #include "libc/str/utf16.h"
 | |
| #include "libc/x/x.h"
 | |
| 
 | |
| static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127};
 | |
| 
 | |
| /**
 | |
|  * Transcodes UTF-16 to UTF-8.
 | |
|  *
 | |
|  * @param p is input value
 | |
|  * @param n if -1 implies strlen
 | |
|  * @param z if non-NULL receives output length
 | |
|  */
 | |
| char *utf16toutf8(const char16_t *p, size_t n, size_t *z) {
 | |
|   char *r, *q;
 | |
|   wint_t x, y;
 | |
|   unsigned m, j, w;
 | |
|   const char16_t *e;
 | |
|   int16_t v1[8], v2[8], v3[8], vz[8];
 | |
|   if (z) *z = 0;
 | |
|   if (n == -1) n = p ? strlen16(p) : 0;
 | |
|   if ((q = r = malloc(n * 4 + 8 + 1))) {
 | |
|     for (e = p + n; p < e;) {
 | |
|       if (p + 8 < e) { /* 17x ascii */
 | |
|         bzero(vz, 16);
 | |
|         do {
 | |
|           memcpy(v1, p, 16);
 | |
|           pcmpgtw(v2, v1, vz);
 | |
|           pcmpgtw(v3, v1, kDel16);
 | |
|           pandn((void *)v2, (void *)v3, (void *)v2);
 | |
|           if (pmovmskb((void *)v2) != 0xFFFF) break;
 | |
|           packsswb((void *)v1, v1, v1);
 | |
|           memcpy(q, v1, 8);
 | |
|           p += 8;
 | |
|           q += 8;
 | |
|         } while (p + 8 < e);
 | |
|       }
 | |
|       x = *p++ & 0xffff;
 | |
|       if (!IsUcs2(x)) {
 | |
|         if (p < e) {
 | |
|           y = *p++ & 0xffff;
 | |
|           x = MergeUtf16(x, y);
 | |
|         } else {
 | |
|           x = 0xFFFD;
 | |
|         }
 | |
|       }
 | |
|       if (x < 0200) {
 | |
|         *q++ = x;
 | |
|       } else {
 | |
|         w = tpenc(x);
 | |
|         WRITE64LE(q, w);
 | |
|         q += bsr(w) >> 3;
 | |
|         q += 1;
 | |
|       }
 | |
|     }
 | |
|     if (z) *z = q - r;
 | |
|     *q++ = '\0';
 | |
|     if ((q = realloc(r, (q - r) * 1))) r = q;
 | |
|   }
 | |
|   return r;
 | |
| }
 |