mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
e16a7d8f3b
`et` means `expandtab`. ```sh rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\) et\(.*\) :vi/vi: \1 xoet\2:vi/' rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\)noet\(.*\):vi/vi: \1et\2 :vi/' rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\)xoet\(.*\):vi/vi: \1noet\2:vi/' ```
140 lines
5.5 KiB
C
140 lines
5.5 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "third_party/python/Include/pyerrors.h"
|
|
#include "third_party/python/Include/pymem.h"
|
|
#include "third_party/python/Modules/bextra.h"
|
|
#include "third_party/python/Modules/unicodedata.h"
|
|
#include "third_party/python/Modules/unicodedata_unidata.h"
|
|
|
|
PyObject *
|
|
_PyUnicode_NfdNfkd(PyObject *self, PyObject *input, int k)
|
|
{
|
|
PyObject *result;
|
|
Py_UCS4 *output;
|
|
Py_ssize_t i, o, osize;
|
|
int kind;
|
|
void *data;
|
|
/* Longest decomposition in Unicode 3.2: U+FDFA */
|
|
Py_UCS4 stack[20];
|
|
Py_ssize_t space, isize;
|
|
int index, prefix, count, stackptr;
|
|
unsigned char prev, cur;
|
|
stackptr = 0;
|
|
isize = PyUnicode_GET_LENGTH(input);
|
|
space = isize;
|
|
/* Overallocate at most 10 characters. */
|
|
if (space > 10) {
|
|
if (space <= PY_SSIZE_T_MAX - 10)
|
|
space += 10;
|
|
}
|
|
else {
|
|
space *= 2;
|
|
}
|
|
osize = space;
|
|
output = PyMem_NEW(Py_UCS4, space);
|
|
if (!output) {
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
i = o = 0;
|
|
kind = PyUnicode_KIND(input);
|
|
data = PyUnicode_DATA(input);
|
|
while (i < isize) {
|
|
stack[stackptr++] = PyUnicode_READ(kind, data, i++);
|
|
while(stackptr) {
|
|
Py_UCS4 code = stack[--stackptr];
|
|
/* Hangul Decomposition adds three characters in
|
|
a single step, so we need at least that much room. */
|
|
if (space < 3) {
|
|
Py_UCS4 *new_output;
|
|
osize += 10;
|
|
space += 10;
|
|
new_output = PyMem_Realloc(output, osize*sizeof(Py_UCS4));
|
|
if (new_output == NULL) {
|
|
PyMem_Free(output);
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
output = new_output;
|
|
}
|
|
/* Hangul Decomposition. */
|
|
if (_Hanghoul_SBase <= code && code < (_Hanghoul_SBase + _Hanghoul_SCount)) {
|
|
int SIndex = code - _Hanghoul_SBase;
|
|
int L = _Hanghoul_LBase + SIndex / _Hanghoul_NCount;
|
|
int V = _Hanghoul_VBase + (SIndex % _Hanghoul_NCount) / _Hanghoul_TCount;
|
|
int T = _Hanghoul_TBase + SIndex % _Hanghoul_TCount;
|
|
output[o++] = L;
|
|
output[o++] = V;
|
|
space -= 2;
|
|
if (T != _Hanghoul_TBase) {
|
|
output[o++] = T;
|
|
space --;
|
|
}
|
|
continue;
|
|
}
|
|
/* normalization changes */
|
|
if (self && UCD_Check(self)) {
|
|
Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
|
|
if (value != 0) {
|
|
stack[stackptr++] = value;
|
|
continue;
|
|
}
|
|
}
|
|
/* Other decompositions. */
|
|
_PyUnicode_GetDecompRecord(self, code, &index, &prefix, &count);
|
|
/* Copy character if it is not decomposable, or has a
|
|
compatibility decomposition, but we do NFD. */
|
|
if (!count || (prefix && !k)) {
|
|
output[o++] = code;
|
|
space--;
|
|
continue;
|
|
}
|
|
/* Copy decomposition onto the stack, in reverse
|
|
order. */
|
|
while(count) {
|
|
code = BitFieldExtract(_PyUnicode_Decomp,
|
|
index + (--count),
|
|
_PyUnicode_DecompBits);
|
|
stack[stackptr++] = code;
|
|
}
|
|
}
|
|
}
|
|
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
|
output, o);
|
|
PyMem_Free(output);
|
|
if (!result)
|
|
return NULL;
|
|
/* result is guaranteed to be ready, as it is compact. */
|
|
kind = PyUnicode_KIND(result);
|
|
data = PyUnicode_DATA(result);
|
|
/* Sort canonically. */
|
|
i = 0;
|
|
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
|
for (i++; i < PyUnicode_GET_LENGTH(result); i++) {
|
|
cur = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
|
if (prev == 0 || cur == 0 || prev <= cur) {
|
|
prev = cur;
|
|
continue;
|
|
}
|
|
/* Non-canonical order. Need to switch *i with previous. */
|
|
o = i - 1;
|
|
while (1) {
|
|
Py_UCS4 tmp = PyUnicode_READ(kind, data, o+1);
|
|
PyUnicode_WRITE(kind, data, o+1,
|
|
PyUnicode_READ(kind, data, o));
|
|
PyUnicode_WRITE(kind, data, o, tmp);
|
|
o--;
|
|
if (o < 0)
|
|
break;
|
|
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, o))->combining;
|
|
if (prev == 0 || prev <= cur)
|
|
break;
|
|
}
|
|
prev = _PyUnicode_GetRecord(PyUnicode_READ(kind, data, i))->combining;
|
|
}
|
|
return result;
|
|
}
|