mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
39bf41f4eb
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
43 lines
2.2 KiB
C
43 lines
2.2 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "third_party/python/Modules/unicodedata.h"
|
|
/* clang-format off */
|
|
|
|
/**
|
|
* Returns 1 if the input is certainly normalized, 0 if it might not be.
|
|
*/
|
|
int
|
|
_PyUnicode_IsNormalized(PyObject *self, PyObject *input, int nfc, int k)
|
|
{
|
|
int kind;
|
|
void *data;
|
|
Py_ssize_t i, len;
|
|
unsigned char prev_combining = 0, quickcheck_mask;
|
|
/* An older version of the database is requested, quickchecks must be
|
|
disabled. */
|
|
if (self && UCD_Check(self))
|
|
return 0;
|
|
/* The two quickcheck bits at this shift mean 0=Yes, 1=Maybe, 2=No,
|
|
as described in http://unicode.org/reports/tr15/#Annex8. */
|
|
quickcheck_mask = 3 << ((nfc ? 4 : 0) + (k ? 2 : 0));
|
|
i = 0;
|
|
kind = PyUnicode_KIND(input);
|
|
data = PyUnicode_DATA(input);
|
|
len = PyUnicode_GET_LENGTH(input);
|
|
while (i < len) {
|
|
Py_UCS4 ch = PyUnicode_READ(kind, data, i++);
|
|
const _PyUnicode_Record *record = _PyUnicode_GetRecord(ch);
|
|
unsigned char combining = record->combining;
|
|
unsigned char quickcheck = record->normalization_quick_check;
|
|
if (quickcheck & quickcheck_mask)
|
|
return 0; /* this string might need normalization */
|
|
if (combining && prev_combining > combining)
|
|
return 0; /* non-canonical sort order, not normalized */
|
|
prev_combining = combining;
|
|
}
|
|
return 1; /* certainly normalized */
|
|
}
|