mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-29 00:32:29 +00:00
Make numerous improvements
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
This commit is contained in:
parent
fa7b4f5bd1
commit
39bf41f4eb
806 changed files with 77494 additions and 63859 deletions
140
third_party/python/Modules/unicodedata_nfdnfkd.c
vendored
Normal file
140
third_party/python/Modules/unicodedata_nfdnfkd.c
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "third_party/python/Include/pyerrors.h"
|
||||
#include "third_party/python/Include/pymem.h"
|
||||
#include "third_party/python/Modules/unicodedata.h"
|
||||
#include "third_party/python/Modules/unicodedata_unidata.h"
|
||||
/* clang-format off */
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_NfdNfkd(PyObject *self, PyObject *input, int k)
|
||||
{
|
||||
PyObject *result;
|
||||
Py_UCS4 *output;
|
||||
Py_ssize_t i, o, osize;
|
||||
int kind;
|
||||
void *data;
|
||||
/* Longest decomposition in Unicode 3.2: U+FDFA */
|
||||
Py_UCS4 stack[20];
|
||||
Py_ssize_t space, isize;
|
||||
int index, prefix, count, stackptr;
|
||||
unsigned char prev, cur;
|
||||
stackptr = 0;
|
||||
isize = PyUnicode_GET_LENGTH(input);
|
||||
space = isize;
|
||||
/* Overallocate at most 10 characters. */
|
||||
if (space > 10) {
|
||||
if (space <= PY_SSIZE_T_MAX - 10)
|
||||
space += 10;
|
||||
}
|
||||
else {
|
||||
space *= 2;
|
||||
}
|
||||
osize = space;
|
||||
output = PyMem_NEW(Py_UCS4, space);
|
||||
if (!output) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
i = o = 0;
|
||||
kind = PyUnicode_KIND(input);
|
||||
data = PyUnicode_DATA(input);
|
||||
while (i < isize) {
|
||||
stack[stackptr++] = PyUnicode_READ(kind, data, i++);
|
||||
while(stackptr) {
|
||||
Py_UCS4 code = stack[--stackptr];
|
||||
/* Hangul Decomposition adds three characters in
|
||||
a single step, so we need at least that much room. */
|
||||
if (space < 3) {
|
||||
Py_UCS4 *new_output;
|
||||
osize += 10;
|
||||
space += 10;
|
||||
new_output = PyMem_Realloc(output, osize*sizeof(Py_UCS4));
|
||||
if (new_output == NULL) {
|
||||
PyMem_Free(output);
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
output = new_output;
|
||||
}
|
||||
/* Hangul Decomposition. */
|
||||
if (_Hanghoul_SBase <= code && code < (_Hanghoul_SBase + _Hanghoul_SCount)) {
|
||||
int SIndex = code - _Hanghoul_SBase;
|
||||
int L = _Hanghoul_LBase + SIndex / _Hanghoul_NCount;
|
||||
int V = _Hanghoul_VBase + (SIndex % _Hanghoul_NCount) / _Hanghoul_TCount;
|
||||
int T = _Hanghoul_TBase + SIndex % _Hanghoul_TCount;
|
||||
output[o++] = L;
|
||||
output[o++] = V;
|
||||
space -= 2;
|
||||
if (T != _Hanghoul_TBase) {
|
||||
output[o++] = T;
|
||||
space --;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/* normalization changes */
|
||||
if (self && UCD_Check(self)) {
|
||||
Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
|
||||
if (value != 0) {
|
||||
stack[stackptr++] = value;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* Other decompositions. */
|
||||
_PyUnicode_GetDecompRecord(self, code, &index, &prefix, &count);
|
||||
/* Copy character if it is not decomposable, or has a
|
||||
compatibility decomposition, but we do NFD. */
|
||||
if (!count || (prefix && !k)) {
|
||||
output[o++] = code;
|
||||
space--;
|
||||
continue;
|
||||
}
|
||||
/* Copy decomposition onto the stack, in reverse
|
||||
order. */
|
||||
while(count) {
|
||||
code = _PyUnicode_Bextr(_PyUnicode_Decomp,
|
||||
index + (--count),
|
||||
_PyUnicode_DecompBits);
|
||||
stack[stackptr++] = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
output, o);
|
||||
PyMem_Free(output);
|
||||
if (!result)
|
||||
return NULL;
|
||||
/* result is guaranteed to be ready, as it is compact. */
|
||||
kind = PyUnicode_KIND(result);
|
||||
data = PyUnicode_DATA(result);
|
||||
/* Sort canonically. */
|
||||
i = 0;
|
||||
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
||||
for (i++; i < PyUnicode_GET_LENGTH(result); i++) {
|
||||
cur = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
||||
if (prev == 0 || cur == 0 || prev <= cur) {
|
||||
prev = cur;
|
||||
continue;
|
||||
}
|
||||
/* Non-canonical order. Need to switch *i with previous. */
|
||||
o = i - 1;
|
||||
while (1) {
|
||||
Py_UCS4 tmp = PyUnicode_READ(kind, data, o+1);
|
||||
PyUnicode_WRITE(kind, data, o+1,
|
||||
PyUnicode_READ(kind, data, o));
|
||||
PyUnicode_WRITE(kind, data, o, tmp);
|
||||
o--;
|
||||
if (o < 0)
|
||||
break;
|
||||
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, o))->combining;
|
||||
if (prev == 0 || prev <= cur)
|
||||
break;
|
||||
}
|
||||
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
||||
}
|
||||
return result;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue