cosmopolitan/third_party/python/Objects/stringlib/find_max_char.inc
Justine Tunney 9b29358511 Make whitespace changes
Status lines for Emacs and Vim have been added to Python sources so
they'll be easier to edit using Python's preferred coding style.

Some DNS helper functions have been broken up into multiple files. It's
nice to have one function per file whenever possible, since that way we
don't need -ffunction-sections.  Another reason it's good to have small
source files, is because the build will be enforcing resource limits on
compilation and testing soon.
2021-08-13 03:20:45 -07:00

142 lines
4.6 KiB
C++

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Python 3 │
│ https://docs.python.org/3/license.html │
╚─────────────────────────────────────────────────────────────────────────────*/
/* clang-format off */
/* Finding the optimal width of unicode characters in a buffer */
#if !STRINGLIB_IS_UNICODE
# error "find_max_char.h is specific to Unicode"
#endif
/* Mask to quickly check whether a C 'long' contains a
non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8)
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
#elif (SIZEOF_LONG == 4)
# define UCS1_ASCII_CHAR_MASK 0x80808080UL
#else
# error C 'long' size should be either 4 or 8!
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
{
const unsigned char *p = (const unsigned char *) begin;
const unsigned char *aligned_end =
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
while (p < end) {
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
/* Help register allocation */
const unsigned char *_p = p;
while (_p < aligned_end) {
unsigned long value = *(unsigned long *) _p;
if (value & UCS1_ASCII_CHAR_MASK)
return 255;
_p += SIZEOF_LONG;
}
p = _p;
if (p == end)
break;
}
if (*p++ & 0x80)
return 255;
}
return 127;
}
#undef ASCII_CHAR_MASK
#else /* STRINGLIB_SIZEOF_CHAR == 1 */
#define MASK_ASCII 0xFFFFFF80
#define MASK_UCS1 0xFFFFFF00
#define MASK_UCS2 0xFFFF0000
#define MAX_CHAR_ASCII 0x7f
#define MAX_CHAR_UCS1 0xff
#define MAX_CHAR_UCS2 0xffff
#define MAX_CHAR_UCS4 0x10ffff
Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
{
#if STRINGLIB_SIZEOF_CHAR == 2
const Py_UCS4 mask_limit = MASK_UCS1;
const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
#elif STRINGLIB_SIZEOF_CHAR == 4
const Py_UCS4 mask_limit = MASK_UCS2;
const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
#else
#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
#endif
Py_UCS4 mask;
Py_ssize_t n = end - begin;
const STRINGLIB_CHAR *p = begin;
const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
Py_UCS4 max_char;
max_char = MAX_CHAR_ASCII;
mask = MASK_ASCII;
while (p < unrolled_end) {
STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
if (bits & mask) {
if (mask == mask_limit) {
/* Limit reached */
return max_char_limit;
}
if (mask == MASK_ASCII) {
max_char = MAX_CHAR_UCS1;
mask = MASK_UCS1;
}
else {
/* mask can't be MASK_UCS2 because of mask_limit above */
assert(mask == MASK_UCS1);
max_char = MAX_CHAR_UCS2;
mask = MASK_UCS2;
}
/* We check the new mask on the same chars in the next iteration */
continue;
}
p += 4;
}
while (p < end) {
if (p[0] & mask) {
if (mask == mask_limit) {
/* Limit reached */
return max_char_limit;
}
if (mask == MASK_ASCII) {
max_char = MAX_CHAR_UCS1;
mask = MASK_UCS1;
}
else {
/* mask can't be MASK_UCS2 because of mask_limit above */
assert(mask == MASK_UCS1);
max_char = MAX_CHAR_UCS2;
mask = MASK_UCS2;
}
/* We check the new mask on the same chars in the next iteration */
continue;
}
p++;
}
return max_char;
}
#undef MASK_ASCII
#undef MASK_UCS1
#undef MASK_UCS2
#undef MAX_CHAR_ASCII
#undef MAX_CHAR_UCS1
#undef MAX_CHAR_UCS2
#undef MAX_CHAR_UCS4
#endif /* STRINGLIB_SIZEOF_CHAR == 1 */