mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-15 02:37:55 +00:00
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
120 lines
3.9 KiB
C
120 lines
3.9 KiB
C
#ifndef COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_H_
|
|
#define COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_H_
|
|
#include "libc/assert.h"
|
|
#include "third_party/python/Include/object.h"
|
|
#include "third_party/python/Include/unicodeobject.h"
|
|
|
|
#define _Hanghoul_SBase 0xAC00
|
|
#define _Hanghoul_LBase 0x1100
|
|
#define _Hanghoul_VBase 0x1161
|
|
#define _Hanghoul_TBase 0x11A7
|
|
#define _Hanghoul_LCount 19
|
|
#define _Hanghoul_VCount 21
|
|
#define _Hanghoul_TCount 28
|
|
#define _Hanghoul_NCount (_Hanghoul_VCount * _Hanghoul_TCount)
|
|
#define _Hanghoul_SCount (_Hanghoul_LCount * _Hanghoul_NCount)
|
|
|
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
|
COSMOPOLITAN_C_START_
|
|
/* clang-format off */
|
|
|
|
/*
|
|
* [jart] if it adds 1.2megs to each binary then it should have an api
|
|
* breaking this up into separate files allows ld to do its job
|
|
*/
|
|
|
|
#define UCD_Check(o) (Py_TYPE(o)==&UCD_Type)
|
|
#define get_old_record(self, v) ((((PreviousDBVersion*)self)->getrecord)(v))
|
|
|
|
typedef struct {
|
|
const unsigned char category; /* index into _PyUnicode_CategoryNames */
|
|
const unsigned char combining; /* combining class value 0 - 255 */
|
|
const unsigned char bidirectional; /* index into _PyUnicode_BidirectionalNames */
|
|
const unsigned char mirrored; /* true if mirrored in bidir mode */
|
|
const unsigned char east_asian_width; /* index into _PyUnicode_EastAsianWidth */
|
|
const unsigned char normalization_quick_check; /* see is_normalized() */
|
|
} _PyUnicode_Record;
|
|
|
|
typedef struct {
|
|
/* sequence of fields should be the same as in merge_old_version */
|
|
const unsigned char bidir_changed;
|
|
const unsigned char category_changed;
|
|
const unsigned char decimal_changed;
|
|
const unsigned char mirrored_changed;
|
|
const unsigned char east_asian_width_changed;
|
|
const double numeric_changed;
|
|
} _PyUnicode_ChangeRecord;
|
|
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
const char *name;
|
|
const _PyUnicode_ChangeRecord *(*getrecord)(Py_UCS4);
|
|
Py_UCS4 (*normalization)(Py_UCS4);
|
|
} PreviousDBVersion;
|
|
|
|
typedef struct {
|
|
int start;
|
|
short count;
|
|
short index;
|
|
} _PyUnicode_Reindex;
|
|
|
|
typedef struct {
|
|
/*
|
|
These are either deltas to the character or offsets in
|
|
_PyUnicode_ExtendedCase.
|
|
*/
|
|
int upper;
|
|
int lower;
|
|
int title;
|
|
/* Note if more flag space is needed, decimal and digit could be unified. */
|
|
unsigned char decimal;
|
|
unsigned char digit;
|
|
unsigned short flags;
|
|
} _PyUnicode_TypeRecord;
|
|
|
|
/*
|
|
* In Unicode 6.0.0, the sequences contain at most 4 BMP chars,
|
|
* so we are using Py_UCS2 seq[4]. This needs to be updated if longer
|
|
* sequences or sequences with non-BMP chars are added.
|
|
* unicodedata_lookup should be adapted too.
|
|
*/
|
|
typedef struct{
|
|
int seqlen;
|
|
Py_UCS2 seq[4];
|
|
} _PyUnicode_NamedSequence;
|
|
|
|
extern PyTypeObject UCD_Type;
|
|
|
|
int _PyUnicode_IsUnifiedIdeograph(Py_UCS4);
|
|
const _PyUnicode_Record *_PyUnicode_GetRecord(Py_UCS4);
|
|
PyObject *_PyUnicode_NfcNfkc(PyObject *, PyObject *, int);
|
|
PyObject *_PyUnicode_NfdNfkd(PyObject *, PyObject *, int);
|
|
int _PyUnicode_IsNormalized(PyObject *, PyObject *, int, int);
|
|
int _PyUnicode_GetUcName(PyObject *, Py_UCS4, char *, int, int);
|
|
int _PyUnicode_FindNfcIndex(const _PyUnicode_Reindex *, Py_UCS4);
|
|
void _PyUnicode_FindSyllable(const char *, int *, int *, int, int);
|
|
int _PyUnicode_GetCode(PyObject *, const char *, int, Py_UCS4 *, int);
|
|
void _PyUnicode_GetDecompRecord(PyObject *, Py_UCS4, int *, int *, int *);
|
|
|
|
static inline unsigned _PyUnicode_Bextr(const unsigned *p, unsigned i, char b) {
|
|
size_t j;
|
|
unsigned k, r, w;
|
|
w = sizeof(unsigned) * CHAR_BIT;
|
|
assert(0 <= b && b < w);
|
|
j = i;
|
|
j *= b;
|
|
k = j & (w - 1);
|
|
j /= w;
|
|
if (k <= w - b) {
|
|
return (p[j] >> k) & ((1ul << b) - 1);
|
|
} else {
|
|
r = p[j] >> k;
|
|
r |= p[j + 1] << (w - k);
|
|
r &= (1ul << b) - 1;
|
|
return r;
|
|
}
|
|
}
|
|
|
|
COSMOPOLITAN_C_END_
|
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
|
#endif /* COSMOPOLITAN_THIRD_PARTY_PYTHON_MODULES_UNICODEDATA_H_ */
|