mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 13:52:28 +00:00
Apply fixes and speedups
This commit is contained in:
parent
7521bf9e73
commit
725f4d79f6
36 changed files with 682 additions and 334 deletions
9
third_party/python/Modules/unicodedata.c
vendored
9
third_party/python/Modules/unicodedata.c
vendored
|
@ -5,6 +5,7 @@
|
|||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/nexgen32e/kompressor.h"
|
||||
#include "third_party/python/Include/floatobject.h"
|
||||
|
@ -404,7 +405,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
|
|||
|
||||
/* high byte is number of hex bytes (usually one or two), low byte
|
||||
is prefix code (from*/
|
||||
count = _PyUnicode_Bextr(_PyUnicode_Decomp, index, _PyUnicode_DecompBits) >> 8;
|
||||
count = bextra(_PyUnicode_Decomp, index, _PyUnicode_DecompBits) >> 8;
|
||||
|
||||
/* XXX: could allocate the PyString up front instead
|
||||
(strlen(prefix) + 5 * count + 1 bytes) */
|
||||
|
@ -412,7 +413,7 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
|
|||
/* Based on how index is calculated above and _PyUnicode_Decomp is
|
||||
generated from Tools/unicode/makeunicodedata.py, it should not be
|
||||
possible to overflow _PyUnicode_DecompPrefix. */
|
||||
prefix_index = _PyUnicode_Bextr(_PyUnicode_Decomp, index, _PyUnicode_DecompBits) & 255;
|
||||
prefix_index = bextra(_PyUnicode_Decomp, index, _PyUnicode_DecompBits) & 255;
|
||||
assert(prefix_index < Py_ARRAY_LENGTH(_PyUnicode_DecompPrefix));
|
||||
|
||||
/* copy prefix */
|
||||
|
@ -424,8 +425,8 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
|
|||
decomp[i++] = ' ';
|
||||
assert(i < sizeof(decomp));
|
||||
PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
|
||||
_PyUnicode_Bextr(_PyUnicode_Decomp, ++index,
|
||||
_PyUnicode_DecompBits));
|
||||
bextra(_PyUnicode_Decomp, ++index,
|
||||
_PyUnicode_DecompBits));
|
||||
i += strlen(decomp + i);
|
||||
}
|
||||
return PyUnicode_FromStringAndSize(decomp, i);
|
||||
|
|
19
third_party/python/Modules/unicodedata.h
vendored
19
third_party/python/Modules/unicodedata.h
vendored
|
@ -96,25 +96,6 @@ void _PyUnicode_FindSyllable(const char *, int *, int *, int, int);
|
|||
int _PyUnicode_GetCode(PyObject *, const char *, int, Py_UCS4 *, int);
|
||||
void _PyUnicode_GetDecompRecord(PyObject *, Py_UCS4, int *, int *, int *);
|
||||
|
||||
static inline unsigned _PyUnicode_Bextr(const unsigned *p, unsigned i, char b) {
|
||||
size_t j;
|
||||
unsigned k, r, w;
|
||||
w = sizeof(unsigned) * CHAR_BIT;
|
||||
assert(0 <= b && b < w);
|
||||
j = i;
|
||||
j *= b;
|
||||
k = j & (w - 1);
|
||||
j /= w;
|
||||
if (k <= w - b) {
|
||||
return (p[j] >> k) & ((1ul << b) - 1);
|
||||
} else {
|
||||
r = p[j] >> k;
|
||||
r |= p[j + 1] << (w - k);
|
||||
r &= (1ul << b) - 1;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_H_ */
|
||||
|
|
37
third_party/python/Modules/unicodedata_getcode.c
vendored
37
third_party/python/Modules/unicodedata_getcode.c
vendored
|
@ -4,6 +4,7 @@
|
|||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "third_party/python/Include/pyctype.h"
|
||||
#include "third_party/python/Include/pyerrors.h"
|
||||
|
@ -20,7 +21,7 @@
|
|||
#define IS_NAMED_SEQ(cp) ((cp >= _PyUnicode_NamedSequencesStart) && \
|
||||
(cp < _PyUnicode_NamedSequencesEnd))
|
||||
|
||||
static const char * const kHangulSyllables[][3] = {
|
||||
static const char kHangulSyllables[][3][4] = {
|
||||
{ "G", "A", "" },
|
||||
{ "GG", "AE", "G" },
|
||||
{ "N", "YA", "GG" },
|
||||
|
@ -40,15 +41,15 @@ static const char * const kHangulSyllables[][3] = {
|
|||
{ "T", "WI", "M" },
|
||||
{ "P", "YU", "B" },
|
||||
{ "H", "EU", "BS" },
|
||||
{ 0, "YI", "S" },
|
||||
{ 0, "I", "SS" },
|
||||
{ 0, 0, "NG" },
|
||||
{ 0, 0, "J" },
|
||||
{ 0, 0, "C" },
|
||||
{ 0, 0, "K" },
|
||||
{ 0, 0, "T" },
|
||||
{ 0, 0, "P" },
|
||||
{ 0, 0, "H" }
|
||||
{ "", "YI", "S" },
|
||||
{ "", "I", "SS" },
|
||||
{ "", "", "NG" },
|
||||
{ "", "", "J" },
|
||||
{ "", "", "C" },
|
||||
{ "", "", "K" },
|
||||
{ "", "", "T" },
|
||||
{ "", "", "P" },
|
||||
{ "", "", "H" }
|
||||
};
|
||||
|
||||
void
|
||||
|
@ -173,7 +174,7 @@ _PyUnicode_GetCode(PyObject *self, const char *name, int namelen, Py_UCS4 *code,
|
|||
details */
|
||||
h = (unsigned int)_gethash(name, namelen, _PyUnicode_CodeMagic);
|
||||
i = ~h & mask;
|
||||
v = _PyUnicode_Bextr(_PyUnicode_CodeHash, i, _PyUnicode_CodeHashBits);
|
||||
v = bextra(_PyUnicode_CodeHash, i, _PyUnicode_CodeHashBits);
|
||||
if (!v)
|
||||
return 0;
|
||||
if (_cmpname(self, v, name, namelen))
|
||||
|
@ -183,7 +184,7 @@ _PyUnicode_GetCode(PyObject *self, const char *name, int namelen, Py_UCS4 *code,
|
|||
incr = mask;
|
||||
for (;;) {
|
||||
i = (i + incr) & mask;
|
||||
v = _PyUnicode_Bextr(_PyUnicode_CodeHash, i, _PyUnicode_CodeHashBits);
|
||||
v = bextra(_PyUnicode_CodeHash, i, _PyUnicode_CodeHashBits);
|
||||
if (!v)
|
||||
return 0;
|
||||
if (_cmpname(self, v, name, namelen))
|
||||
|
@ -246,10 +247,10 @@ _PyUnicode_GetUcName(PyObject *self, Py_UCS4 code, char *buffer, int buflen,
|
|||
}
|
||||
/* get offset into phrasebook */
|
||||
offset = _PyUnicode_PhrasebookOffset1[(code>>_PyUnicode_PhrasebookShift)];
|
||||
offset = _PyUnicode_Bextr(_PyUnicode_PhrasebookOffset2,
|
||||
(offset << _PyUnicode_PhrasebookShift) +
|
||||
(code & ((1 << _PyUnicode_PhrasebookShift) - 1)),
|
||||
_PyUnicode_PhrasebookOffset2Bits);
|
||||
offset = bextra(_PyUnicode_PhrasebookOffset2,
|
||||
(offset << _PyUnicode_PhrasebookShift) +
|
||||
(code & ((1 << _PyUnicode_PhrasebookShift) - 1)),
|
||||
_PyUnicode_PhrasebookOffset2Bits);
|
||||
if (!offset)
|
||||
return 0;
|
||||
i = 0;
|
||||
|
@ -270,8 +271,8 @@ _PyUnicode_GetUcName(PyObject *self, Py_UCS4 code, char *buffer, int buflen,
|
|||
word has bit 7 set. the last word in a string ends with
|
||||
0x80 */
|
||||
w = (_PyUnicode_Lexicon +
|
||||
_PyUnicode_Bextr(_PyUnicode_LexiconOffset,
|
||||
word, _PyUnicode_LexiconOffsetBits));
|
||||
bextra(_PyUnicode_LexiconOffset, word,
|
||||
_PyUnicode_LexiconOffsetBits));
|
||||
while (*w < 128) {
|
||||
if (i >= buflen)
|
||||
return 0; /* buffer overflow */
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "third_party/python/Modules/unicodedata.h"
|
||||
#include "third_party/python/Modules/unicodedata_unidata.h"
|
||||
/* clang-format off */
|
||||
|
@ -30,7 +31,7 @@ _PyUnicode_GetDecompRecord(PyObject *self,
|
|||
}
|
||||
/* high byte is number of hex bytes (usually one or two), low byte
|
||||
is prefix code (from*/
|
||||
decomp = _PyUnicode_Bextr(_PyUnicode_Decomp, *index, _PyUnicode_DecompBits);
|
||||
decomp = bextra(_PyUnicode_Decomp, *index, _PyUnicode_DecompBits);
|
||||
*count = decomp >> 8;
|
||||
*prefix = decomp & 255;
|
||||
(*index)++;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/likely.h"
|
||||
#include "third_party/python/Include/pyerrors.h"
|
||||
#include "third_party/python/Include/pymem.h"
|
||||
|
@ -114,10 +115,10 @@ _PyUnicode_NfcNfkc(PyObject *self, PyObject *input, int k)
|
|||
}
|
||||
index = f * UNIDATA_TOTAL_LAST + l;
|
||||
index1 = _PyUnicode_CompIndex[index >> _PyUnicode_CompShift];
|
||||
code = _PyUnicode_Bextr(_PyUnicode_CompData,
|
||||
(index1 << _PyUnicode_CompShift)+
|
||||
(index & ((1 << _PyUnicode_CompShift) - 1)),
|
||||
_PyUnicode_CompDataBits);
|
||||
code = bextra(_PyUnicode_CompData,
|
||||
(index1 << _PyUnicode_CompShift)+
|
||||
(index & ((1 << _PyUnicode_CompShift) - 1)),
|
||||
_PyUnicode_CompDataBits);
|
||||
if (code == 0)
|
||||
goto not_combinable;
|
||||
/* Replace the original character. */
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
│ Python 3 │
|
||||
│ https://docs.python.org/3/license.html │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "third_party/python/Include/pyerrors.h"
|
||||
#include "third_party/python/Include/pymem.h"
|
||||
#include "third_party/python/Modules/unicodedata.h"
|
||||
|
@ -96,9 +97,9 @@ _PyUnicode_NfdNfkd(PyObject *self, PyObject *input, int k)
|
|||
/* Copy decomposition onto the stack, in reverse
|
||||
order. */
|
||||
while(count) {
|
||||
code = _PyUnicode_Bextr(_PyUnicode_Decomp,
|
||||
index + (--count),
|
||||
_PyUnicode_DecompBits);
|
||||
code = bextra(_PyUnicode_Decomp,
|
||||
index + (--count),
|
||||
_PyUnicode_DecompBits);
|
||||
stack[stackptr++] = code;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue