mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
40
third_party/python/Objects/stringlib/README.txt
vendored
Normal file
40
third_party/python/Objects/stringlib/README.txt
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
bits shared by the bytesobject and unicodeobject implementations (and
|
||||
possibly other modules, in a not too distant future).
|
||||
|
||||
the stuff in here is included into relevant places; see the individual
|
||||
source files for details.
|
||||
|
||||
--------------------------------------------------------------------
|
||||
the following defines used by the different modules:
|
||||
|
||||
STRINGLIB_CHAR
|
||||
|
||||
the type used to hold a character (char or Py_UNICODE)
|
||||
|
||||
STRINGLIB_EMPTY
|
||||
|
||||
a PyObject representing the empty string, only to be used if
|
||||
STRINGLIB_MUTABLE is 0
|
||||
|
||||
Py_ssize_t STRINGLIB_LEN(PyObject*)
|
||||
|
||||
returns the length of the given string object (which must be of the
|
||||
right type)
|
||||
|
||||
PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
|
||||
|
||||
creates a new string object
|
||||
|
||||
STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
|
||||
|
||||
returns the pointer to the character data for the given string
|
||||
object (which must be of the right type)
|
||||
|
||||
int STRINGLIB_CHECK_EXACT(PyObject *)
|
||||
|
||||
returns true if the object is an instance of our type, not a subclass
|
||||
|
||||
STRINGLIB_MUTABLE
|
||||
|
||||
must be 0 or 1 to tell the cpp macros in stringlib code if the object
|
||||
being operated on is mutable or not
|
29
third_party/python/Objects/stringlib/asciilib.h
vendored
Normal file
29
third_party/python/Objects/stringlib/asciilib.h
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH asciilib_fastsearch
|
||||
#define STRINGLIB(F) asciilib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_SIZEOF_CHAR 1
|
||||
#define STRINGLIB_MAX_CHAR 0x7Fu
|
||||
#define STRINGLIB_CHAR Py_UCS1
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ascii_InsertThousandsGrouping
|
||||
|
822
third_party/python/Objects/stringlib/codecs.h
vendored
Normal file
822
third_party/python/Objects/stringlib/codecs.h
vendored
Normal file
|
@ -0,0 +1,822 @@
|
|||
/* stringlib: codec implementations */
|
||||
|
||||
#if !STRINGLIB_IS_UNICODE
|
||||
# error "codecs.h is specific to Unicode"
|
||||
#endif
|
||||
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080UL
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
/* 10xxxxxx */
|
||||
#define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0)
|
||||
|
||||
Py_LOCAL_INLINE(Py_UCS4)
|
||||
STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
||||
STRINGLIB_CHAR *dest,
|
||||
Py_ssize_t *outpos)
|
||||
{
|
||||
Py_UCS4 ch;
|
||||
const char *s = *inptr;
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
STRINGLIB_CHAR *p = dest + *outpos;
|
||||
|
||||
while (s < end) {
|
||||
ch = (unsigned char)*s;
|
||||
|
||||
if (ch < 0x80) {
|
||||
/* Fast path for runs of ASCII characters. Given that common UTF-8
|
||||
input will consist of an overwhelming majority of ASCII
|
||||
characters, we try to optimize for this case by checking
|
||||
as many characters as a C 'long' can contain.
|
||||
First, check if we can do an aligned read, as most CPUs have
|
||||
a penalty for unaligned reads.
|
||||
*/
|
||||
if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
|
||||
/* Help register allocation */
|
||||
const char *_s = s;
|
||||
STRINGLIB_CHAR *_p = p;
|
||||
while (_s < aligned_end) {
|
||||
/* Read a whole long at a time (either 4 or 8 bytes),
|
||||
and do a fast unrolled copy if it only contains ASCII
|
||||
characters. */
|
||||
unsigned long value = *(unsigned long *) _s;
|
||||
if (value & ASCII_CHAR_MASK)
|
||||
break;
|
||||
#if PY_LITTLE_ENDIAN
|
||||
_p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
|
||||
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
|
||||
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
|
||||
_p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
|
||||
# if SIZEOF_LONG == 8
|
||||
_p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
|
||||
_p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
|
||||
_p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
|
||||
_p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
|
||||
# endif
|
||||
#else
|
||||
# if SIZEOF_LONG == 8
|
||||
_p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
|
||||
_p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
|
||||
_p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
|
||||
_p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
|
||||
_p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
|
||||
_p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
|
||||
_p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
|
||||
_p[7] = (STRINGLIB_CHAR)(value & 0xFFu);
|
||||
# else
|
||||
_p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
|
||||
_p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
|
||||
_p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
|
||||
_p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
|
||||
# endif
|
||||
#endif
|
||||
_s += SIZEOF_LONG;
|
||||
_p += SIZEOF_LONG;
|
||||
}
|
||||
s = _s;
|
||||
p = _p;
|
||||
if (s == end)
|
||||
break;
|
||||
ch = (unsigned char)*s;
|
||||
}
|
||||
if (ch < 0x80) {
|
||||
s++;
|
||||
*p++ = ch;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (ch < 0xE0) {
|
||||
/* \xC2\x80-\xDF\xBF -- 0080-07FF */
|
||||
Py_UCS4 ch2;
|
||||
if (ch < 0xC2) {
|
||||
/* invalid sequence
|
||||
\x80-\xBF -- continuation byte
|
||||
\xC0-\xC1 -- fake 0000-007F */
|
||||
goto InvalidStart;
|
||||
}
|
||||
if (end - s < 2) {
|
||||
/* unexpected end of data: the caller will decide whether
|
||||
it's an error or not */
|
||||
break;
|
||||
}
|
||||
ch2 = (unsigned char)s[1];
|
||||
if (!IS_CONTINUATION_BYTE(ch2))
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation1;
|
||||
ch = (ch << 6) + ch2 -
|
||||
((0xC0 << 6) + 0x80);
|
||||
assert ((ch > 0x007F) && (ch <= 0x07FF));
|
||||
s += 2;
|
||||
if (STRINGLIB_MAX_CHAR <= 0x007F ||
|
||||
(STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
*p++ = ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch < 0xF0) {
|
||||
/* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */
|
||||
Py_UCS4 ch2, ch3;
|
||||
if (end - s < 3) {
|
||||
/* unexpected end of data: the caller will decide whether
|
||||
it's an error or not */
|
||||
if (end - s < 2)
|
||||
break;
|
||||
ch2 = (unsigned char)s[1];
|
||||
if (!IS_CONTINUATION_BYTE(ch2) ||
|
||||
(ch2 < 0xA0 ? ch == 0xE0 : ch == 0xED))
|
||||
/* for clarification see comments below */
|
||||
goto InvalidContinuation1;
|
||||
break;
|
||||
}
|
||||
ch2 = (unsigned char)s[1];
|
||||
ch3 = (unsigned char)s[2];
|
||||
if (!IS_CONTINUATION_BYTE(ch2)) {
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation1;
|
||||
}
|
||||
if (ch == 0xE0) {
|
||||
if (ch2 < 0xA0)
|
||||
/* invalid sequence
|
||||
\xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */
|
||||
goto InvalidContinuation1;
|
||||
} else if (ch == 0xED && ch2 >= 0xA0) {
|
||||
/* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
|
||||
will result in surrogates in range D800-DFFF. Surrogates are
|
||||
not valid UTF-8 so they are rejected.
|
||||
See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
|
||||
(table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
|
||||
goto InvalidContinuation1;
|
||||
}
|
||||
if (!IS_CONTINUATION_BYTE(ch3)) {
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation2;
|
||||
}
|
||||
ch = (ch << 12) + (ch2 << 6) + ch3 -
|
||||
((0xE0 << 12) + (0x80 << 6) + 0x80);
|
||||
assert ((ch > 0x07FF) && (ch <= 0xFFFF));
|
||||
s += 3;
|
||||
if (STRINGLIB_MAX_CHAR <= 0x07FF ||
|
||||
(STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR))
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
*p++ = ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch < 0xF5) {
|
||||
/* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */
|
||||
Py_UCS4 ch2, ch3, ch4;
|
||||
if (end - s < 4) {
|
||||
/* unexpected end of data: the caller will decide whether
|
||||
it's an error or not */
|
||||
if (end - s < 2)
|
||||
break;
|
||||
ch2 = (unsigned char)s[1];
|
||||
if (!IS_CONTINUATION_BYTE(ch2) ||
|
||||
(ch2 < 0x90 ? ch == 0xF0 : ch == 0xF4))
|
||||
/* for clarification see comments below */
|
||||
goto InvalidContinuation1;
|
||||
if (end - s < 3)
|
||||
break;
|
||||
ch3 = (unsigned char)s[2];
|
||||
if (!IS_CONTINUATION_BYTE(ch3))
|
||||
goto InvalidContinuation2;
|
||||
break;
|
||||
}
|
||||
ch2 = (unsigned char)s[1];
|
||||
ch3 = (unsigned char)s[2];
|
||||
ch4 = (unsigned char)s[3];
|
||||
if (!IS_CONTINUATION_BYTE(ch2)) {
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation1;
|
||||
}
|
||||
if (ch == 0xF0) {
|
||||
if (ch2 < 0x90)
|
||||
/* invalid sequence
|
||||
\xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */
|
||||
goto InvalidContinuation1;
|
||||
} else if (ch == 0xF4 && ch2 >= 0x90) {
|
||||
/* invalid sequence
|
||||
\xF4\x90\x80\80- -- 110000- overflow */
|
||||
goto InvalidContinuation1;
|
||||
}
|
||||
if (!IS_CONTINUATION_BYTE(ch3)) {
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation2;
|
||||
}
|
||||
if (!IS_CONTINUATION_BYTE(ch4)) {
|
||||
/* invalid continuation byte */
|
||||
goto InvalidContinuation3;
|
||||
}
|
||||
ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -
|
||||
((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);
|
||||
assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));
|
||||
s += 4;
|
||||
if (STRINGLIB_MAX_CHAR <= 0xFFFF ||
|
||||
(STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR))
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
*p++ = ch;
|
||||
continue;
|
||||
}
|
||||
goto InvalidStart;
|
||||
}
|
||||
ch = 0;
|
||||
Return:
|
||||
*inptr = s;
|
||||
*outpos = p - dest;
|
||||
return ch;
|
||||
InvalidStart:
|
||||
ch = 1;
|
||||
goto Return;
|
||||
InvalidContinuation1:
|
||||
ch = 2;
|
||||
goto Return;
|
||||
InvalidContinuation2:
|
||||
ch = 3;
|
||||
goto Return;
|
||||
InvalidContinuation3:
|
||||
ch = 4;
|
||||
goto Return;
|
||||
}
|
||||
|
||||
#undef ASCII_CHAR_MASK
|
||||
|
||||
|
||||
/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
|
||||
PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
|
||||
UCS-1 strings don't need to handle surrogates for example. */
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||
STRINGLIB_CHAR *data,
|
||||
Py_ssize_t size,
|
||||
const char *errors)
|
||||
{
|
||||
Py_ssize_t i; /* index into data of next input character */
|
||||
char *p; /* next free byte in output buffer */
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
PyObject *error_handler_obj = NULL;
|
||||
PyObject *exc = NULL;
|
||||
PyObject *rep = NULL;
|
||||
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||
#endif
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
const Py_ssize_t max_char_size = 2;
|
||||
#elif STRINGLIB_SIZEOF_CHAR == 2
|
||||
const Py_ssize_t max_char_size = 3;
|
||||
#else /* STRINGLIB_SIZEOF_CHAR == 4 */
|
||||
const Py_ssize_t max_char_size = 4;
|
||||
#endif
|
||||
_PyBytesWriter writer;
|
||||
|
||||
assert(size >= 0);
|
||||
_PyBytesWriter_Init(&writer);
|
||||
|
||||
if (size > PY_SSIZE_T_MAX / max_char_size) {
|
||||
/* integer overflow */
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
p = _PyBytesWriter_Alloc(&writer, size * max_char_size);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < size;) {
|
||||
Py_UCS4 ch = data[i++];
|
||||
|
||||
if (ch < 0x80) {
|
||||
/* Encode ASCII */
|
||||
*p++ = (char) ch;
|
||||
|
||||
}
|
||||
else
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
if (ch < 0x0800)
|
||||
#endif
|
||||
{
|
||||
/* Encode Latin-1 */
|
||||
*p++ = (char)(0xc0 | (ch >> 6));
|
||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
else if (Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
Py_ssize_t startpos, endpos, newpos;
|
||||
Py_ssize_t k;
|
||||
if (error_handler == _Py_ERROR_UNKNOWN) {
|
||||
error_handler = get_error_handler(errors);
|
||||
}
|
||||
|
||||
startpos = i-1;
|
||||
endpos = startpos+1;
|
||||
|
||||
while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
|
||||
endpos++;
|
||||
|
||||
/* Only overallocate the buffer if it's not the last write */
|
||||
writer.overallocate = (endpos < size);
|
||||
|
||||
switch (error_handler)
|
||||
{
|
||||
case _Py_ERROR_REPLACE:
|
||||
memset(p, '?', endpos - startpos);
|
||||
p += (endpos - startpos);
|
||||
/* fall through */
|
||||
case _Py_ERROR_IGNORE:
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_SURROGATEPASS:
|
||||
for (k=startpos; k<endpos; k++) {
|
||||
ch = data[k];
|
||||
*p++ = (char)(0xe0 | (ch >> 12));
|
||||
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_BACKSLASHREPLACE:
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= max_char_size * (endpos - startpos);
|
||||
p = backslashreplace(&writer, p,
|
||||
unicode, startpos, endpos);
|
||||
if (p == NULL)
|
||||
goto error;
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_XMLCHARREFREPLACE:
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= max_char_size * (endpos - startpos);
|
||||
p = xmlcharrefreplace(&writer, p,
|
||||
unicode, startpos, endpos);
|
||||
if (p == NULL)
|
||||
goto error;
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
|
||||
case _Py_ERROR_SURROGATEESCAPE:
|
||||
for (k=startpos; k<endpos; k++) {
|
||||
ch = data[k];
|
||||
if (!(0xDC80 <= ch && ch <= 0xDCFF))
|
||||
break;
|
||||
*p++ = (char)(ch & 0xff);
|
||||
}
|
||||
if (k >= endpos) {
|
||||
i += (endpos - startpos - 1);
|
||||
break;
|
||||
}
|
||||
startpos = k;
|
||||
assert(startpos < endpos);
|
||||
/* fall through */
|
||||
default:
|
||||
rep = unicode_encode_call_errorhandler(
|
||||
errors, &error_handler_obj, "utf-8", "surrogates not allowed",
|
||||
unicode, &exc, startpos, endpos, &newpos);
|
||||
if (!rep)
|
||||
goto error;
|
||||
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= max_char_size * (newpos - startpos);
|
||||
|
||||
if (PyBytes_Check(rep)) {
|
||||
p = _PyBytesWriter_WriteBytes(&writer, p,
|
||||
PyBytes_AS_STRING(rep),
|
||||
PyBytes_GET_SIZE(rep));
|
||||
}
|
||||
else {
|
||||
/* rep is unicode */
|
||||
if (PyUnicode_READY(rep) < 0)
|
||||
goto error;
|
||||
|
||||
if (!PyUnicode_IS_ASCII(rep)) {
|
||||
raise_encode_exception(&exc, "utf-8", unicode,
|
||||
startpos, endpos,
|
||||
"surrogates not allowed");
|
||||
goto error;
|
||||
}
|
||||
|
||||
p = _PyBytesWriter_WriteBytes(&writer, p,
|
||||
PyUnicode_DATA(rep),
|
||||
PyUnicode_GET_LENGTH(rep));
|
||||
}
|
||||
|
||||
if (p == NULL)
|
||||
goto error;
|
||||
Py_CLEAR(rep);
|
||||
|
||||
i = newpos;
|
||||
}
|
||||
|
||||
/* If overallocation was disabled, ensure that it was the last
|
||||
write. Otherwise, we missed an optimization */
|
||||
assert(writer.overallocate || i == size);
|
||||
}
|
||||
else
|
||||
#if STRINGLIB_SIZEOF_CHAR > 2
|
||||
if (ch < 0x10000)
|
||||
#endif
|
||||
{
|
||||
*p++ = (char)(0xe0 | (ch >> 12));
|
||||
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
#if STRINGLIB_SIZEOF_CHAR > 2
|
||||
else /* ch >= 0x10000 */
|
||||
{
|
||||
assert(ch <= MAX_UNICODE);
|
||||
/* Encode UCS4 Unicode ordinals */
|
||||
*p++ = (char)(0xf0 | (ch >> 18));
|
||||
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
|
||||
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||
}
|
||||
#endif /* STRINGLIB_SIZEOF_CHAR > 2 */
|
||||
#endif /* STRINGLIB_SIZEOF_CHAR > 1 */
|
||||
}
|
||||
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
Py_XDECREF(error_handler_obj);
|
||||
Py_XDECREF(exc);
|
||||
#endif
|
||||
return _PyBytesWriter_Finish(&writer, p);
|
||||
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
error:
|
||||
Py_XDECREF(rep);
|
||||
Py_XDECREF(error_handler_obj);
|
||||
Py_XDECREF(exc);
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The pattern for constructing UCS2-repeated masks. */
|
||||
#if SIZEOF_LONG == 8
|
||||
# define UCS2_REPEAT_MASK 0x0001000100010001ul
|
||||
#elif SIZEOF_LONG == 4
|
||||
# define UCS2_REPEAT_MASK 0x00010001ul
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
/* The mask for fast checking. */
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
/* The mask for fast checking of whether a C 'long' contains a
|
||||
non-ASCII or non-Latin1 UTF16-encoded characters. */
|
||||
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
|
||||
#else
|
||||
/* The mask for fast checking of whether a C 'long' may contain
|
||||
UTF16-encoded surrogate characters. This is an efficient heuristic,
|
||||
assuming that non-surrogate characters with a code point >= 0x8000 are
|
||||
rare in most input.
|
||||
*/
|
||||
# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
|
||||
#endif
|
||||
/* The mask for fast byte-swapping. */
|
||||
#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
|
||||
/* Swap bytes. */
|
||||
#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
|
||||
(((value) & STRIPPED_MASK) << 8))
|
||||
|
||||
Py_LOCAL_INLINE(Py_UCS4)
|
||||
STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
|
||||
STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
|
||||
int native_ordering)
|
||||
{
|
||||
Py_UCS4 ch;
|
||||
const unsigned char *aligned_end =
|
||||
(const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
|
||||
const unsigned char *q = *inptr;
|
||||
STRINGLIB_CHAR *p = dest + *outpos;
|
||||
/* Offsets from q for retrieving byte pairs in the right order. */
|
||||
#if PY_LITTLE_ENDIAN
|
||||
int ihi = !!native_ordering, ilo = !native_ordering;
|
||||
#else
|
||||
int ihi = !native_ordering, ilo = !!native_ordering;
|
||||
#endif
|
||||
--e;
|
||||
|
||||
while (q < e) {
|
||||
Py_UCS4 ch2;
|
||||
/* First check for possible aligned read of a C 'long'. Unaligned
|
||||
reads are more expensive, better to defer to another iteration. */
|
||||
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
|
||||
/* Fast path for runs of in-range non-surrogate chars. */
|
||||
const unsigned char *_q = q;
|
||||
while (_q < aligned_end) {
|
||||
unsigned long block = * (unsigned long *) _q;
|
||||
if (native_ordering) {
|
||||
/* Can use buffer directly */
|
||||
if (block & FAST_CHAR_MASK)
|
||||
break;
|
||||
}
|
||||
else {
|
||||
/* Need to byte-swap */
|
||||
if (block & SWAB(FAST_CHAR_MASK))
|
||||
break;
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
block >>= 8;
|
||||
#else
|
||||
block = SWAB(block);
|
||||
#endif
|
||||
}
|
||||
#if PY_LITTLE_ENDIAN
|
||||
# if SIZEOF_LONG == 4
|
||||
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
p[1] = (STRINGLIB_CHAR)(block >> 16);
|
||||
# elif SIZEOF_LONG == 8
|
||||
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
|
||||
p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
|
||||
p[3] = (STRINGLIB_CHAR)(block >> 48);
|
||||
# endif
|
||||
#else
|
||||
# if SIZEOF_LONG == 4
|
||||
p[0] = (STRINGLIB_CHAR)(block >> 16);
|
||||
p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
# elif SIZEOF_LONG == 8
|
||||
p[0] = (STRINGLIB_CHAR)(block >> 48);
|
||||
p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
|
||||
p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
|
||||
p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
|
||||
# endif
|
||||
#endif
|
||||
_q += SIZEOF_LONG;
|
||||
p += SIZEOF_LONG / 2;
|
||||
}
|
||||
q = _q;
|
||||
if (q >= e)
|
||||
break;
|
||||
}
|
||||
|
||||
ch = (q[ihi] << 8) | q[ilo];
|
||||
q += 2;
|
||||
if (!Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
#if STRINGLIB_SIZEOF_CHAR < 2
|
||||
if (ch > STRINGLIB_MAX_CHAR)
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
#endif
|
||||
*p++ = (STRINGLIB_CHAR)ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* UTF-16 code pair: */
|
||||
if (q >= e)
|
||||
goto UnexpectedEnd;
|
||||
if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
|
||||
goto IllegalEncoding;
|
||||
ch2 = (q[ihi] << 8) | q[ilo];
|
||||
q += 2;
|
||||
if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
|
||||
goto IllegalSurrogate;
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
|
||||
#if STRINGLIB_SIZEOF_CHAR < 4
|
||||
/* Out-of-range */
|
||||
goto Return;
|
||||
#else
|
||||
*p++ = (STRINGLIB_CHAR)ch;
|
||||
#endif
|
||||
}
|
||||
ch = 0;
|
||||
Return:
|
||||
*inptr = q;
|
||||
*outpos = p - dest;
|
||||
return ch;
|
||||
UnexpectedEnd:
|
||||
ch = 1;
|
||||
goto Return;
|
||||
IllegalEncoding:
|
||||
ch = 2;
|
||||
goto Return;
|
||||
IllegalSurrogate:
|
||||
ch = 3;
|
||||
goto Return;
|
||||
}
|
||||
#undef UCS2_REPEAT_MASK
|
||||
#undef FAST_CHAR_MASK
|
||||
#undef STRIPPED_MASK
|
||||
#undef SWAB
|
||||
|
||||
|
||||
#if STRINGLIB_MAX_CHAR >= 0x80
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
|
||||
Py_ssize_t len,
|
||||
unsigned short **outptr,
|
||||
int native_ordering)
|
||||
{
|
||||
unsigned short *out = *outptr;
|
||||
const STRINGLIB_CHAR *end = in + len;
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
if (native_ordering) {
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
out[0] = in[0];
|
||||
out[1] = in[1];
|
||||
out[2] = in[2];
|
||||
out[3] = in[3];
|
||||
in += 4; out += 4;
|
||||
}
|
||||
while (in < end) {
|
||||
*out++ = *in++;
|
||||
}
|
||||
} else {
|
||||
# define SWAB2(CH) ((CH) << 8) /* high byte is zero */
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
out[0] = SWAB2(in[0]);
|
||||
out[1] = SWAB2(in[1]);
|
||||
out[2] = SWAB2(in[2]);
|
||||
out[3] = SWAB2(in[3]);
|
||||
in += 4; out += 4;
|
||||
}
|
||||
while (in < end) {
|
||||
Py_UCS4 ch = *in++;
|
||||
*out++ = SWAB2((Py_UCS2)ch);
|
||||
}
|
||||
#undef SWAB2
|
||||
}
|
||||
*outptr = out;
|
||||
return len;
|
||||
#else
|
||||
if (native_ordering) {
|
||||
#if STRINGLIB_MAX_CHAR < 0x10000
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
/* check if any character is a surrogate character */
|
||||
if (((in[0] ^ 0xd800) &
|
||||
(in[1] ^ 0xd800) &
|
||||
(in[2] ^ 0xd800) &
|
||||
(in[3] ^ 0xd800) & 0xf800) == 0)
|
||||
break;
|
||||
out[0] = in[0];
|
||||
out[1] = in[1];
|
||||
out[2] = in[2];
|
||||
out[3] = in[3];
|
||||
in += 4; out += 4;
|
||||
}
|
||||
#endif
|
||||
while (in < end) {
|
||||
Py_UCS4 ch;
|
||||
ch = *in++;
|
||||
if (ch < 0xd800)
|
||||
*out++ = ch;
|
||||
else if (ch < 0xe000)
|
||||
/* reject surrogate characters (U+D800-U+DFFF) */
|
||||
goto fail;
|
||||
#if STRINGLIB_MAX_CHAR >= 0x10000
|
||||
else if (ch >= 0x10000) {
|
||||
out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
|
||||
out[1] = Py_UNICODE_LOW_SURROGATE(ch);
|
||||
out += 2;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
*out++ = ch;
|
||||
}
|
||||
} else {
|
||||
#define SWAB2(CH) (((CH) << 8) | ((CH) >> 8))
|
||||
#if STRINGLIB_MAX_CHAR < 0x10000
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
/* check if any character is a surrogate character */
|
||||
if (((in[0] ^ 0xd800) &
|
||||
(in[1] ^ 0xd800) &
|
||||
(in[2] ^ 0xd800) &
|
||||
(in[3] ^ 0xd800) & 0xf800) == 0)
|
||||
break;
|
||||
out[0] = SWAB2(in[0]);
|
||||
out[1] = SWAB2(in[1]);
|
||||
out[2] = SWAB2(in[2]);
|
||||
out[3] = SWAB2(in[3]);
|
||||
in += 4; out += 4;
|
||||
}
|
||||
#endif
|
||||
while (in < end) {
|
||||
Py_UCS4 ch = *in++;
|
||||
if (ch < 0xd800)
|
||||
*out++ = SWAB2((Py_UCS2)ch);
|
||||
else if (ch < 0xe000)
|
||||
/* reject surrogate characters (U+D800-U+DFFF) */
|
||||
goto fail;
|
||||
#if STRINGLIB_MAX_CHAR >= 0x10000
|
||||
else if (ch >= 0x10000) {
|
||||
Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
|
||||
Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
|
||||
out[0] = SWAB2(ch1);
|
||||
out[1] = SWAB2(ch2);
|
||||
out += 2;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
*out++ = SWAB2((Py_UCS2)ch);
|
||||
}
|
||||
#undef SWAB2
|
||||
}
|
||||
*outptr = out;
|
||||
return len;
|
||||
fail:
|
||||
*outptr = out;
|
||||
return len - (end - in + 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */
|
||||
#elif STRINGLIB_SIZEOF_CHAR == 2
|
||||
# define SWAB4(CH, tmp) (tmp = (CH), \
|
||||
((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))
|
||||
/* high bytes are zero */
|
||||
#else
|
||||
# define SWAB4(CH, tmp) (tmp = (CH), \
|
||||
tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
|
||||
((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
|
||||
#endif
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
|
||||
Py_ssize_t len,
|
||||
PY_UINT32_T **outptr,
|
||||
int native_ordering)
|
||||
{
|
||||
PY_UINT32_T *out = *outptr;
|
||||
const STRINGLIB_CHAR *end = in + len;
|
||||
if (native_ordering) {
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
/* check if any character is a surrogate character */
|
||||
if (((in[0] ^ 0xd800) &
|
||||
(in[1] ^ 0xd800) &
|
||||
(in[2] ^ 0xd800) &
|
||||
(in[3] ^ 0xd800) & 0xf800) == 0)
|
||||
break;
|
||||
#endif
|
||||
out[0] = in[0];
|
||||
out[1] = in[1];
|
||||
out[2] = in[2];
|
||||
out[3] = in[3];
|
||||
in += 4; out += 4;
|
||||
}
|
||||
while (in < end) {
|
||||
Py_UCS4 ch;
|
||||
ch = *in++;
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
if (Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
/* reject surrogate characters (U+D800-U+DFFF) */
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
*out++ = ch;
|
||||
}
|
||||
} else {
|
||||
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
|
||||
while (in < unrolled_end) {
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
Py_UCS4 ch1, ch2, ch3, ch4;
|
||||
/* check if any character is a surrogate character */
|
||||
if (((in[0] ^ 0xd800) &
|
||||
(in[1] ^ 0xd800) &
|
||||
(in[2] ^ 0xd800) &
|
||||
(in[3] ^ 0xd800) & 0xf800) == 0)
|
||||
break;
|
||||
#endif
|
||||
out[0] = SWAB4(in[0], ch1);
|
||||
out[1] = SWAB4(in[1], ch2);
|
||||
out[2] = SWAB4(in[2], ch3);
|
||||
out[3] = SWAB4(in[3], ch4);
|
||||
in += 4; out += 4;
|
||||
}
|
||||
while (in < end) {
|
||||
Py_UCS4 ch = *in++;
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
if (Py_UNICODE_IS_SURROGATE(ch)) {
|
||||
/* reject surrogate characters (U+D800-U+DFFF) */
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
*out++ = SWAB4(ch, ch);
|
||||
}
|
||||
}
|
||||
*outptr = out;
|
||||
return len;
|
||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||
fail:
|
||||
*outptr = out;
|
||||
return len - (end - in + 1);
|
||||
#endif
|
||||
}
|
||||
#undef SWAB4
|
||||
|
||||
#endif
|
27
third_party/python/Objects/stringlib/count.h
vendored
Normal file
27
third_party/python/Objects/stringlib/count.h
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* stringlib: count implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t count;
|
||||
|
||||
if (str_len < 0)
|
||||
return 0; /* start > len(str) */
|
||||
if (sub_len == 0)
|
||||
return (str_len < maxcount) ? str_len + 1 : maxcount;
|
||||
|
||||
count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
|
||||
|
||||
if (count < 0)
|
||||
return 0; /* no match */
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
110
third_party/python/Objects/stringlib/ctype.h
vendored
Normal file
110
third_party/python/Objects/stringlib/ctype.h
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
#if STRINGLIB_IS_UNICODE
|
||||
# error "ctype.h only compatible with byte-wise strings"
|
||||
#endif
|
||||
|
||||
#include "bytes_methods.h"
|
||||
|
||||
static PyObject*
|
||||
stringlib_isspace(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_isalpha(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_isalnum(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_isdigit(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_islower(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_isupper(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_istitle(PyObject *self)
|
||||
{
|
||||
return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
|
||||
/* functions that return a new object partially translated by ctype funcs: */
|
||||
|
||||
static PyObject*
|
||||
stringlib_lower(PyObject *self)
|
||||
{
|
||||
PyObject* newobj;
|
||||
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
|
||||
if (!newobj)
|
||||
return NULL;
|
||||
_Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
return newobj;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_upper(PyObject *self)
|
||||
{
|
||||
PyObject* newobj;
|
||||
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
|
||||
if (!newobj)
|
||||
return NULL;
|
||||
_Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
return newobj;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_title(PyObject *self)
|
||||
{
|
||||
PyObject* newobj;
|
||||
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
|
||||
if (!newobj)
|
||||
return NULL;
|
||||
_Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
return newobj;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_capitalize(PyObject *self)
|
||||
{
|
||||
PyObject* newobj;
|
||||
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
|
||||
if (!newobj)
|
||||
return NULL;
|
||||
_Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
return newobj;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_swapcase(PyObject *self)
|
||||
{
|
||||
PyObject* newobj;
|
||||
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
|
||||
if (!newobj)
|
||||
return NULL;
|
||||
_Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
return newobj;
|
||||
}
|
25
third_party/python/Objects/stringlib/eq.h
vendored
Normal file
25
third_party/python/Objects/stringlib/eq.h
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* Fast unicode equal function optimized for dictobject.c and setobject.c */
|
||||
|
||||
/* Return 1 if two unicode objects are equal, 0 if not.
|
||||
* unicode_eq() is called when the hash of two unicode objects is equal.
|
||||
*/
|
||||
Py_LOCAL_INLINE(int)
|
||||
unicode_eq(PyObject *aa, PyObject *bb)
|
||||
{
|
||||
PyUnicodeObject *a = (PyUnicodeObject *)aa;
|
||||
PyUnicodeObject *b = (PyUnicodeObject *)bb;
|
||||
|
||||
if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
|
||||
assert(0 && "unicode_eq ready fail");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
|
||||
return 0;
|
||||
if (PyUnicode_GET_LENGTH(a) == 0)
|
||||
return 1;
|
||||
if (PyUnicode_KIND(a) != PyUnicode_KIND(b))
|
||||
return 0;
|
||||
return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),
|
||||
PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0;
|
||||
}
|
250
third_party/python/Objects/stringlib/fastsearch.h
vendored
Normal file
250
third_party/python/Objects/stringlib/fastsearch.h
vendored
Normal file
|
@ -0,0 +1,250 @@
|
|||
/* stringlib: fastsearch implementation */
|
||||
|
||||
#define STRINGLIB_FASTSEARCH_H
|
||||
|
||||
/* fast search/count implementation, based on a mix between boyer-
|
||||
moore and horspool, with a few more bells and whistles on the top.
|
||||
for some more background, see: http://effbot.org/zone/stringlib.htm */
|
||||
|
||||
/* note: fastsearch may access s[n], which isn't a problem when using
|
||||
Python's ordinary string types, but may cause problems if you're
|
||||
using this code in other contexts. also, the count mode returns -1
|
||||
if there cannot possible be a match in the target string, and 0 if
|
||||
it has actually checked for matches, but didn't find any. callers
|
||||
beware! */
|
||||
|
||||
#define FAST_COUNT 0
|
||||
#define FAST_SEARCH 1
|
||||
#define FAST_RSEARCH 2
|
||||
|
||||
#if LONG_BIT >= 128
|
||||
#define STRINGLIB_BLOOM_WIDTH 128
|
||||
#elif LONG_BIT >= 64
|
||||
#define STRINGLIB_BLOOM_WIDTH 64
|
||||
#elif LONG_BIT >= 32
|
||||
#define STRINGLIB_BLOOM_WIDTH 32
|
||||
#else
|
||||
#error "LONG_BIT is smaller than 32"
|
||||
#endif
|
||||
|
||||
#define STRINGLIB_BLOOM_ADD(mask, ch) \
|
||||
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||
#define STRINGLIB_BLOOM(mask, ch) \
|
||||
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
||||
{
|
||||
const STRINGLIB_CHAR *p, *e;
|
||||
|
||||
p = s;
|
||||
e = s + n;
|
||||
if (n > 10) {
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
p = memchr(s, ch, n);
|
||||
if (p != NULL)
|
||||
return (p - s);
|
||||
return -1;
|
||||
#else
|
||||
/* use memchr if we can choose a needle without two many likely
|
||||
false positives */
|
||||
unsigned char needle = ch & 0xff;
|
||||
/* If looking for a multiple of 256, we'd have too
|
||||
many false positives looking for the '\0' byte in UCS2
|
||||
and UCS4 representations. */
|
||||
if (needle != 0) {
|
||||
while (p < e) {
|
||||
void *candidate = memchr(p, needle,
|
||||
(e - p) * sizeof(STRINGLIB_CHAR));
|
||||
if (candidate == NULL)
|
||||
return -1;
|
||||
p = (const STRINGLIB_CHAR *)
|
||||
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
||||
if (*p == ch)
|
||||
return (p - s);
|
||||
/* False positive */
|
||||
p++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
while (p < e) {
|
||||
if (*p == ch)
|
||||
return (p - s);
|
||||
p++;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
||||
{
|
||||
const STRINGLIB_CHAR *p;
|
||||
#ifdef HAVE_MEMRCHR
|
||||
/* memrchr() is a GNU extension, available since glibc 2.1.91.
|
||||
it doesn't seem as optimized as memchr(), but is still quite
|
||||
faster than our hand-written loop below */
|
||||
|
||||
if (n > 10) {
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
p = memrchr(s, ch, n);
|
||||
if (p != NULL)
|
||||
return (p - s);
|
||||
return -1;
|
||||
#else
|
||||
/* use memrchr if we can choose a needle without two many likely
|
||||
false positives */
|
||||
unsigned char needle = ch & 0xff;
|
||||
/* If looking for a multiple of 256, we'd have too
|
||||
many false positives looking for the '\0' byte in UCS2
|
||||
and UCS4 representations. */
|
||||
if (needle != 0) {
|
||||
while (n > 0) {
|
||||
void *candidate = memrchr(s, needle,
|
||||
n * sizeof(STRINGLIB_CHAR));
|
||||
if (candidate == NULL)
|
||||
return -1;
|
||||
p = (const STRINGLIB_CHAR *)
|
||||
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
||||
n = p - s;
|
||||
if (*p == ch)
|
||||
return n;
|
||||
/* False positive */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif /* HAVE_MEMRCHR */
|
||||
p = s + n;
|
||||
while (p > s) {
|
||||
p--;
|
||||
if (*p == ch)
|
||||
return (p - s);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
||||
const STRINGLIB_CHAR* p, Py_ssize_t m,
|
||||
Py_ssize_t maxcount, int mode)
|
||||
{
|
||||
unsigned long mask;
|
||||
Py_ssize_t skip, count = 0;
|
||||
Py_ssize_t i, j, mlast, w;
|
||||
|
||||
w = n - m;
|
||||
|
||||
if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
|
||||
return -1;
|
||||
|
||||
/* look for special cases */
|
||||
if (m <= 1) {
|
||||
if (m <= 0)
|
||||
return -1;
|
||||
/* use special case for 1-character strings */
|
||||
if (mode == FAST_SEARCH)
|
||||
return STRINGLIB(find_char)(s, n, p[0]);
|
||||
else if (mode == FAST_RSEARCH)
|
||||
return STRINGLIB(rfind_char)(s, n, p[0]);
|
||||
else { /* FAST_COUNT */
|
||||
for (i = 0; i < n; i++)
|
||||
if (s[i] == p[0]) {
|
||||
count++;
|
||||
if (count == maxcount)
|
||||
return maxcount;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mlast = m - 1;
|
||||
skip = mlast - 1;
|
||||
mask = 0;
|
||||
|
||||
if (mode != FAST_RSEARCH) {
|
||||
const STRINGLIB_CHAR *ss = s + m - 1;
|
||||
const STRINGLIB_CHAR *pp = p + m - 1;
|
||||
|
||||
/* create compressed boyer-moore delta 1 table */
|
||||
|
||||
/* process pattern[:-1] */
|
||||
for (i = 0; i < mlast; i++) {
|
||||
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
||||
if (p[i] == p[mlast])
|
||||
skip = mlast - i - 1;
|
||||
}
|
||||
/* process pattern[-1] outside the loop */
|
||||
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
|
||||
|
||||
for (i = 0; i <= w; i++) {
|
||||
/* note: using mlast in the skip path slows things down on x86 */
|
||||
if (ss[i] == pp[0]) {
|
||||
/* candidate match */
|
||||
for (j = 0; j < mlast; j++)
|
||||
if (s[i+j] != p[j])
|
||||
break;
|
||||
if (j == mlast) {
|
||||
/* got a match! */
|
||||
if (mode != FAST_COUNT)
|
||||
return i;
|
||||
count++;
|
||||
if (count == maxcount)
|
||||
return maxcount;
|
||||
i = i + mlast;
|
||||
continue;
|
||||
}
|
||||
/* miss: check if next character is part of pattern */
|
||||
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
|
||||
i = i + m;
|
||||
else
|
||||
i = i + skip;
|
||||
} else {
|
||||
/* skip: check if next character is part of pattern */
|
||||
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
|
||||
i = i + m;
|
||||
}
|
||||
}
|
||||
} else { /* FAST_RSEARCH */
|
||||
|
||||
/* create compressed boyer-moore delta 1 table */
|
||||
|
||||
/* process pattern[0] outside the loop */
|
||||
STRINGLIB_BLOOM_ADD(mask, p[0]);
|
||||
/* process pattern[:0:-1] */
|
||||
for (i = mlast; i > 0; i--) {
|
||||
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
||||
if (p[i] == p[0])
|
||||
skip = i - 1;
|
||||
}
|
||||
|
||||
for (i = w; i >= 0; i--) {
|
||||
if (s[i] == p[0]) {
|
||||
/* candidate match */
|
||||
for (j = mlast; j > 0; j--)
|
||||
if (s[i+j] != p[j])
|
||||
break;
|
||||
if (j == 0)
|
||||
/* got a match! */
|
||||
return i;
|
||||
/* miss: check if previous character is part of pattern */
|
||||
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
|
||||
i = i - m;
|
||||
else
|
||||
i = i - skip;
|
||||
} else {
|
||||
/* skip: check if previous character is part of pattern */
|
||||
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
|
||||
i = i - m;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mode != FAST_COUNT)
|
||||
return -1;
|
||||
return count;
|
||||
}
|
||||
|
119
third_party/python/Objects/stringlib/find.h
vendored
Normal file
119
third_party/python/Objects/stringlib/find.h
vendored
Normal file
|
@ -0,0 +1,119 @@
|
|||
/* stringlib: find/index implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
Py_ssize_t pos;
|
||||
|
||||
assert(str_len >= 0);
|
||||
if (sub_len == 0)
|
||||
return offset;
|
||||
|
||||
pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);
|
||||
|
||||
if (pos >= 0)
|
||||
pos += offset;
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t offset)
|
||||
{
|
||||
Py_ssize_t pos;
|
||||
|
||||
assert(str_len >= 0);
|
||||
if (sub_len == 0)
|
||||
return str_len + offset;
|
||||
|
||||
pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
|
||||
|
||||
if (pos >= 0)
|
||||
pos += offset;
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(Py_ssize_t)
|
||||
STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
|
||||
}
|
||||
|
||||
#ifdef STRINGLIB_WANT_CONTAINS_OBJ
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
|
||||
{
|
||||
return STRINGLIB(find)(
|
||||
STRINGLIB_STR(str), STRINGLIB_LEN(str),
|
||||
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
|
||||
) != -1;
|
||||
}
|
||||
|
||||
#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
|
||||
|
||||
/*
|
||||
This function is a helper for the "find" family (find, rfind, index,
|
||||
rindex) and for count, startswith and endswith, because they all have
|
||||
the same behaviour for the arguments.
|
||||
|
||||
It does not touch the variables received until it knows everything
|
||||
is ok.
|
||||
*/
|
||||
|
||||
#define FORMAT_BUFFER_SIZE 50
|
||||
|
||||
Py_LOCAL_INLINE(int)
|
||||
STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
|
||||
PyObject **subobj,
|
||||
Py_ssize_t *start, Py_ssize_t *end)
|
||||
{
|
||||
PyObject *tmp_subobj;
|
||||
Py_ssize_t tmp_start = 0;
|
||||
Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
|
||||
PyObject *obj_start=Py_None, *obj_end=Py_None;
|
||||
char format[FORMAT_BUFFER_SIZE] = "O|OO:";
|
||||
size_t len = strlen(format);
|
||||
|
||||
strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
|
||||
format[FORMAT_BUFFER_SIZE - 1] = '\0';
|
||||
|
||||
if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
|
||||
return 0;
|
||||
|
||||
/* To support None in "start" and "end" arguments, meaning
|
||||
the same as if they were not passed.
|
||||
*/
|
||||
if (obj_start != Py_None)
|
||||
if (!_PyEval_SliceIndex(obj_start, &tmp_start))
|
||||
return 0;
|
||||
if (obj_end != Py_None)
|
||||
if (!_PyEval_SliceIndex(obj_end, &tmp_end))
|
||||
return 0;
|
||||
|
||||
*start = tmp_start;
|
||||
*end = tmp_end;
|
||||
*subobj = tmp_subobj;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#undef FORMAT_BUFFER_SIZE
|
134
third_party/python/Objects/stringlib/find_max_char.h
vendored
Normal file
134
third_party/python/Objects/stringlib/find_max_char.h
vendored
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* Finding the optimal width of unicode characters in a buffer */
|
||||
|
||||
#if !STRINGLIB_IS_UNICODE
|
||||
# error "find_max_char.h is specific to Unicode"
|
||||
#endif
|
||||
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x80808080UL
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
|
||||
Py_LOCAL_INLINE(Py_UCS4)
|
||||
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *) begin;
|
||||
const unsigned char *aligned_end =
|
||||
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
|
||||
while (p < end) {
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
|
||||
/* Help register allocation */
|
||||
const unsigned char *_p = p;
|
||||
while (_p < aligned_end) {
|
||||
unsigned long value = *(unsigned long *) _p;
|
||||
if (value & UCS1_ASCII_CHAR_MASK)
|
||||
return 255;
|
||||
_p += SIZEOF_LONG;
|
||||
}
|
||||
p = _p;
|
||||
if (p == end)
|
||||
break;
|
||||
}
|
||||
if (*p++ & 0x80)
|
||||
return 255;
|
||||
}
|
||||
return 127;
|
||||
}
|
||||
|
||||
#undef ASCII_CHAR_MASK
|
||||
|
||||
#else /* STRINGLIB_SIZEOF_CHAR == 1 */
|
||||
|
||||
#define MASK_ASCII 0xFFFFFF80
|
||||
#define MASK_UCS1 0xFFFFFF00
|
||||
#define MASK_UCS2 0xFFFF0000
|
||||
|
||||
#define MAX_CHAR_ASCII 0x7f
|
||||
#define MAX_CHAR_UCS1 0xff
|
||||
#define MAX_CHAR_UCS2 0xffff
|
||||
#define MAX_CHAR_UCS4 0x10ffff
|
||||
|
||||
Py_LOCAL_INLINE(Py_UCS4)
|
||||
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
||||
{
|
||||
#if STRINGLIB_SIZEOF_CHAR == 2
|
||||
const Py_UCS4 mask_limit = MASK_UCS1;
|
||||
const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
|
||||
#elif STRINGLIB_SIZEOF_CHAR == 4
|
||||
const Py_UCS4 mask_limit = MASK_UCS2;
|
||||
const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
|
||||
#else
|
||||
#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
|
||||
#endif
|
||||
Py_UCS4 mask;
|
||||
Py_ssize_t n = end - begin;
|
||||
const STRINGLIB_CHAR *p = begin;
|
||||
const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
|
||||
Py_UCS4 max_char;
|
||||
|
||||
max_char = MAX_CHAR_ASCII;
|
||||
mask = MASK_ASCII;
|
||||
while (p < unrolled_end) {
|
||||
STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
|
||||
if (bits & mask) {
|
||||
if (mask == mask_limit) {
|
||||
/* Limit reached */
|
||||
return max_char_limit;
|
||||
}
|
||||
if (mask == MASK_ASCII) {
|
||||
max_char = MAX_CHAR_UCS1;
|
||||
mask = MASK_UCS1;
|
||||
}
|
||||
else {
|
||||
/* mask can't be MASK_UCS2 because of mask_limit above */
|
||||
assert(mask == MASK_UCS1);
|
||||
max_char = MAX_CHAR_UCS2;
|
||||
mask = MASK_UCS2;
|
||||
}
|
||||
/* We check the new mask on the same chars in the next iteration */
|
||||
continue;
|
||||
}
|
||||
p += 4;
|
||||
}
|
||||
while (p < end) {
|
||||
if (p[0] & mask) {
|
||||
if (mask == mask_limit) {
|
||||
/* Limit reached */
|
||||
return max_char_limit;
|
||||
}
|
||||
if (mask == MASK_ASCII) {
|
||||
max_char = MAX_CHAR_UCS1;
|
||||
mask = MASK_UCS1;
|
||||
}
|
||||
else {
|
||||
/* mask can't be MASK_UCS2 because of mask_limit above */
|
||||
assert(mask == MASK_UCS1);
|
||||
max_char = MAX_CHAR_UCS2;
|
||||
mask = MASK_UCS2;
|
||||
}
|
||||
/* We check the new mask on the same chars in the next iteration */
|
||||
continue;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
return max_char;
|
||||
}
|
||||
|
||||
#undef MASK_ASCII
|
||||
#undef MASK_UCS1
|
||||
#undef MASK_UCS2
|
||||
#undef MAX_CHAR_ASCII
|
||||
#undef MAX_CHAR_UCS1
|
||||
#undef MAX_CHAR_UCS2
|
||||
#undef MAX_CHAR_UCS4
|
||||
|
||||
#endif /* STRINGLIB_SIZEOF_CHAR == 1 */
|
||||
|
140
third_party/python/Objects/stringlib/join.h
vendored
Normal file
140
third_party/python/Objects/stringlib/join.h
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
/* stringlib: bytes joining implementation */
|
||||
|
||||
#if STRINGLIB_IS_UNICODE
|
||||
#error join.h only compatible with byte-wise strings
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
|
||||
{
|
||||
char *sepstr = STRINGLIB_STR(sep);
|
||||
const Py_ssize_t seplen = STRINGLIB_LEN(sep);
|
||||
PyObject *res = NULL;
|
||||
char *p;
|
||||
Py_ssize_t seqlen = 0;
|
||||
Py_ssize_t sz = 0;
|
||||
Py_ssize_t i, nbufs;
|
||||
PyObject *seq, *item;
|
||||
Py_buffer *buffers = NULL;
|
||||
#define NB_STATIC_BUFFERS 10
|
||||
Py_buffer static_buffers[NB_STATIC_BUFFERS];
|
||||
|
||||
seq = PySequence_Fast(iterable, "can only join an iterable");
|
||||
if (seq == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
seqlen = PySequence_Fast_GET_SIZE(seq);
|
||||
if (seqlen == 0) {
|
||||
Py_DECREF(seq);
|
||||
return STRINGLIB_NEW(NULL, 0);
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (seqlen == 1) {
|
||||
item = PySequence_Fast_GET_ITEM(seq, 0);
|
||||
if (STRINGLIB_CHECK_EXACT(item)) {
|
||||
Py_INCREF(item);
|
||||
Py_DECREF(seq);
|
||||
return item;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (seqlen > NB_STATIC_BUFFERS) {
|
||||
buffers = PyMem_NEW(Py_buffer, seqlen);
|
||||
if (buffers == NULL) {
|
||||
Py_DECREF(seq);
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
buffers = static_buffers;
|
||||
}
|
||||
|
||||
/* Here is the general case. Do a pre-pass to figure out the total
|
||||
* amount of space we'll need (sz), and see whether all arguments are
|
||||
* bytes-like.
|
||||
*/
|
||||
for (i = 0, nbufs = 0; i < seqlen; i++) {
|
||||
Py_ssize_t itemlen;
|
||||
item = PySequence_Fast_GET_ITEM(seq, i);
|
||||
if (PyBytes_CheckExact(item)) {
|
||||
/* Fast path. */
|
||||
Py_INCREF(item);
|
||||
buffers[i].obj = item;
|
||||
buffers[i].buf = PyBytes_AS_STRING(item);
|
||||
buffers[i].len = PyBytes_GET_SIZE(item);
|
||||
}
|
||||
else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"sequence item %zd: expected a bytes-like object, "
|
||||
"%.80s found",
|
||||
i, Py_TYPE(item)->tp_name);
|
||||
goto error;
|
||||
}
|
||||
nbufs = i + 1; /* for error cleanup */
|
||||
itemlen = buffers[i].len;
|
||||
if (itemlen > PY_SSIZE_T_MAX - sz) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"join() result is too long");
|
||||
goto error;
|
||||
}
|
||||
sz += itemlen;
|
||||
if (i != 0) {
|
||||
if (seplen > PY_SSIZE_T_MAX - sz) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"join() result is too long");
|
||||
goto error;
|
||||
}
|
||||
sz += seplen;
|
||||
}
|
||||
if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"sequence changed size during iteration");
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate result space. */
|
||||
res = STRINGLIB_NEW(NULL, sz);
|
||||
if (res == NULL)
|
||||
goto error;
|
||||
|
||||
/* Catenate everything. */
|
||||
p = STRINGLIB_STR(res);
|
||||
if (!seplen) {
|
||||
/* fast path */
|
||||
for (i = 0; i < nbufs; i++) {
|
||||
Py_ssize_t n = buffers[i].len;
|
||||
char *q = buffers[i].buf;
|
||||
memcpy(p, q, n);
|
||||
p += n;
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
for (i = 0; i < nbufs; i++) {
|
||||
Py_ssize_t n;
|
||||
char *q;
|
||||
if (i) {
|
||||
memcpy(p, sepstr, seplen);
|
||||
p += seplen;
|
||||
}
|
||||
n = buffers[i].len;
|
||||
q = buffers[i].buf;
|
||||
memcpy(p, q, n);
|
||||
p += n;
|
||||
}
|
||||
goto done;
|
||||
|
||||
error:
|
||||
res = NULL;
|
||||
done:
|
||||
Py_DECREF(seq);
|
||||
for (i = 0; i < nbufs; i++)
|
||||
PyBuffer_Release(&buffers[i]);
|
||||
if (buffers != static_buffers)
|
||||
PyMem_FREE(buffers);
|
||||
return res;
|
||||
}
|
||||
|
||||
#undef NB_STATIC_BUFFERS
|
82
third_party/python/Objects/stringlib/localeutil.h
vendored
Normal file
82
third_party/python/Objects/stringlib/localeutil.h
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||
|
||||
typedef struct {
|
||||
const char *grouping;
|
||||
char previous;
|
||||
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
||||
} GroupGenerator;
|
||||
|
||||
|
||||
static void
|
||||
GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
||||
{
|
||||
self->grouping = grouping;
|
||||
self->i = 0;
|
||||
self->previous = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Returns the next grouping, or 0 to signify end. */
|
||||
static Py_ssize_t
|
||||
GroupGenerator_next(GroupGenerator *self)
|
||||
{
|
||||
/* Note that we don't really do much error checking here. If a
|
||||
grouping string contains just CHAR_MAX, for example, then just
|
||||
terminate the generator. That shouldn't happen, but at least we
|
||||
fail gracefully. */
|
||||
switch (self->grouping[self->i]) {
|
||||
case 0:
|
||||
return self->previous;
|
||||
case CHAR_MAX:
|
||||
/* Stop the generator. */
|
||||
return 0;
|
||||
default: {
|
||||
char ch = self->grouping[self->i];
|
||||
self->previous = ch;
|
||||
self->i++;
|
||||
return (Py_ssize_t)ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Fill in some digits, leading zeros, and thousands separator. All
|
||||
are optional, depending on when we're called. */
|
||||
static void
|
||||
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
|
||||
PyObject *digits, Py_ssize_t *digits_pos,
|
||||
Py_ssize_t n_chars, Py_ssize_t n_zeros,
|
||||
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
|
||||
Py_UCS4 *maxchar)
|
||||
{
|
||||
if (!writer) {
|
||||
/* if maxchar > 127, maxchar is already set */
|
||||
if (*maxchar == 127 && thousands_sep) {
|
||||
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
|
||||
*maxchar = Py_MAX(*maxchar, maxchar2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (thousands_sep) {
|
||||
*buffer_pos -= thousands_sep_len;
|
||||
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
thousands_sep, 0,
|
||||
thousands_sep_len);
|
||||
}
|
||||
|
||||
*buffer_pos -= n_chars;
|
||||
*digits_pos -= n_chars;
|
||||
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||
digits, *digits_pos,
|
||||
n_chars);
|
||||
|
||||
if (n_zeros) {
|
||||
*buffer_pos -= n_zeros;
|
||||
enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
|
||||
void *data = PyUnicode_DATA(writer->buffer);
|
||||
FILL(kind, data, '0', *buffer_pos, n_zeros);
|
||||
}
|
||||
}
|
116
third_party/python/Objects/stringlib/partition.h
vendored
Normal file
116
third_party/python/Objects/stringlib/partition.h
vendored
Normal file
|
@ -0,0 +1,116 @@
|
|||
/* stringlib: partition implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(PyObject*)
|
||||
STRINGLIB(partition)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
PyObject* sep_obj,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
|
||||
{
|
||||
PyObject* out;
|
||||
Py_ssize_t pos;
|
||||
|
||||
if (sep_len == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty separator");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
out = PyTuple_New(3);
|
||||
if (!out)
|
||||
return NULL;
|
||||
|
||||
pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH);
|
||||
|
||||
if (pos < 0) {
|
||||
#if STRINGLIB_MUTABLE
|
||||
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
|
||||
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
|
||||
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
Py_DECREF(out);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
Py_INCREF(str_obj);
|
||||
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
|
||||
Py_INCREF(STRINGLIB_EMPTY);
|
||||
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
|
||||
Py_INCREF(STRINGLIB_EMPTY);
|
||||
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
|
||||
#endif
|
||||
return out;
|
||||
}
|
||||
|
||||
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
|
||||
Py_INCREF(sep_obj);
|
||||
PyTuple_SET_ITEM(out, 1, sep_obj);
|
||||
pos += sep_len;
|
||||
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
Py_DECREF(out);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject*)
|
||||
STRINGLIB(rpartition)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
PyObject* sep_obj,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
|
||||
{
|
||||
PyObject* out;
|
||||
Py_ssize_t pos;
|
||||
|
||||
if (sep_len == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty separator");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
out = PyTuple_New(3);
|
||||
if (!out)
|
||||
return NULL;
|
||||
|
||||
pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
|
||||
|
||||
if (pos < 0) {
|
||||
#if STRINGLIB_MUTABLE
|
||||
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
|
||||
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
|
||||
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
Py_DECREF(out);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
Py_INCREF(STRINGLIB_EMPTY);
|
||||
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
|
||||
Py_INCREF(STRINGLIB_EMPTY);
|
||||
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
|
||||
Py_INCREF(str_obj);
|
||||
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
|
||||
#endif
|
||||
return out;
|
||||
}
|
||||
|
||||
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
|
||||
Py_INCREF(sep_obj);
|
||||
PyTuple_SET_ITEM(out, 1, sep_obj);
|
||||
pos += sep_len;
|
||||
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
Py_DECREF(out);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
53
third_party/python/Objects/stringlib/replace.h
vendored
Normal file
53
third_party/python/Objects/stringlib/replace.h
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* stringlib: replace implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
Py_LOCAL_INLINE(void)
|
||||
STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
|
||||
Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
|
||||
{
|
||||
*s = u2;
|
||||
while (--maxcount && ++s != end) {
|
||||
/* Find the next character to be replaced.
|
||||
|
||||
If it occurs often, it is faster to scan for it using an inline
|
||||
loop. If it occurs seldom, it is faster to scan for it using a
|
||||
function call; the overhead of the function call is amortized
|
||||
across the many characters that call covers. We start with an
|
||||
inline loop and use a heuristic to determine whether to fall back
|
||||
to a function call. */
|
||||
if (*s != u1) {
|
||||
int attempts = 10;
|
||||
/* search u1 in a dummy loop */
|
||||
while (1) {
|
||||
if (++s == end)
|
||||
return;
|
||||
if (*s == u1)
|
||||
break;
|
||||
if (!--attempts) {
|
||||
/* if u1 was not found for attempts iterations,
|
||||
use FASTSEARCH() or memchr() */
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
s++;
|
||||
s = memchr(s, u1, end - s);
|
||||
if (s == NULL)
|
||||
return;
|
||||
#else
|
||||
Py_ssize_t i;
|
||||
STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
|
||||
s++;
|
||||
i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
|
||||
if (i < 0)
|
||||
return;
|
||||
s += i;
|
||||
#endif
|
||||
/* restart the dummy loop */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*s = u2;
|
||||
}
|
||||
}
|
390
third_party/python/Objects/stringlib/split.h
vendored
Normal file
390
third_party/python/Objects/stringlib/split.h
vendored
Normal file
|
@ -0,0 +1,390 @@
|
|||
/* stringlib: split implementation */
|
||||
|
||||
#ifndef STRINGLIB_FASTSEARCH_H
|
||||
#error must include "stringlib/fastsearch.h" before including this module
|
||||
#endif
|
||||
|
||||
/* Overallocate the initial list to reduce the number of reallocs for small
|
||||
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
|
||||
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
|
||||
text (roughly 11 words per line) and field delimited data (usually 1-10
|
||||
fields). For large strings the split algorithms are bandwidth limited
|
||||
so increasing the preallocation likely will not improve things.*/
|
||||
|
||||
#define MAX_PREALLOC 12
|
||||
|
||||
/* 5 splits gives 6 elements */
|
||||
#define PREALLOC_SIZE(maxsplit) \
|
||||
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
|
||||
|
||||
#define SPLIT_APPEND(data, left, right) \
|
||||
sub = STRINGLIB_NEW((data) + (left), \
|
||||
(right) - (left)); \
|
||||
if (sub == NULL) \
|
||||
goto onError; \
|
||||
if (PyList_Append(list, sub)) { \
|
||||
Py_DECREF(sub); \
|
||||
goto onError; \
|
||||
} \
|
||||
else \
|
||||
Py_DECREF(sub);
|
||||
|
||||
#define SPLIT_ADD(data, left, right) { \
|
||||
sub = STRINGLIB_NEW((data) + (left), \
|
||||
(right) - (left)); \
|
||||
if (sub == NULL) \
|
||||
goto onError; \
|
||||
if (count < MAX_PREALLOC) { \
|
||||
PyList_SET_ITEM(list, count, sub); \
|
||||
} else { \
|
||||
if (PyList_Append(list, sub)) { \
|
||||
Py_DECREF(sub); \
|
||||
goto onError; \
|
||||
} \
|
||||
else \
|
||||
Py_DECREF(sub); \
|
||||
} \
|
||||
count++; }
|
||||
|
||||
|
||||
/* Always force the list to the expected size. */
|
||||
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(split_whitespace)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t i, j, count=0;
|
||||
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
PyObject *sub;
|
||||
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
i = j = 0;
|
||||
while (maxcount-- > 0) {
|
||||
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
|
||||
i++;
|
||||
if (i == str_len) break;
|
||||
j = i; i++;
|
||||
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
|
||||
i++;
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* No whitespace in str_obj, so just use it as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
SPLIT_ADD(str, j, i);
|
||||
}
|
||||
|
||||
if (i < str_len) {
|
||||
/* Only occurs when maxcount was reached */
|
||||
/* Skip any remaining whitespace and copy to end of string */
|
||||
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
|
||||
i++;
|
||||
if (i != str_len)
|
||||
SPLIT_ADD(str, i, str_len);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(split_char)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR ch,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t i, j, count=0;
|
||||
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
PyObject *sub;
|
||||
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
i = j = 0;
|
||||
while ((j < str_len) && (maxcount-- > 0)) {
|
||||
for(; j < str_len; j++) {
|
||||
/* I found that using memchr makes no difference */
|
||||
if (str[j] == ch) {
|
||||
SPLIT_ADD(str, i, j);
|
||||
i = j = j + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* ch not in str_obj, so just use str_obj as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
} else
|
||||
#endif
|
||||
if (i <= str_len) {
|
||||
SPLIT_ADD(str, i, str_len);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(split)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t i, j, pos, count=0;
|
||||
PyObject *list, *sub;
|
||||
|
||||
if (sep_len == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty separator");
|
||||
return NULL;
|
||||
}
|
||||
else if (sep_len == 1)
|
||||
return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);
|
||||
|
||||
list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
i = j = 0;
|
||||
while (maxcount-- > 0) {
|
||||
pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
|
||||
if (pos < 0)
|
||||
break;
|
||||
j = i + pos;
|
||||
SPLIT_ADD(str, i, j);
|
||||
i = j + sep_len;
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* No match in str_obj, so just use it as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
SPLIT_ADD(str, i, str_len);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t i, j, count=0;
|
||||
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
PyObject *sub;
|
||||
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
i = j = str_len - 1;
|
||||
while (maxcount-- > 0) {
|
||||
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
|
||||
i--;
|
||||
if (i < 0) break;
|
||||
j = i; i--;
|
||||
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
|
||||
i--;
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* No whitespace in str_obj, so just use it as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
SPLIT_ADD(str, i + 1, j + 1);
|
||||
}
|
||||
|
||||
if (i >= 0) {
|
||||
/* Only occurs when maxcount was reached */
|
||||
/* Skip any remaining whitespace and copy to beginning of string */
|
||||
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
|
||||
i--;
|
||||
if (i >= 0)
|
||||
SPLIT_ADD(str, 0, i + 1);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
if (PyList_Reverse(list) < 0)
|
||||
goto onError;
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(rsplit_char)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR ch,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t i, j, count=0;
|
||||
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
PyObject *sub;
|
||||
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
i = j = str_len - 1;
|
||||
while ((i >= 0) && (maxcount-- > 0)) {
|
||||
for(; i >= 0; i--) {
|
||||
if (str[i] == ch) {
|
||||
SPLIT_ADD(str, i + 1, j + 1);
|
||||
j = i = i - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* ch not in str_obj, so just use str_obj as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
} else
|
||||
#endif
|
||||
if (j >= -1) {
|
||||
SPLIT_ADD(str, 0, j + 1);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
if (PyList_Reverse(list) < 0)
|
||||
goto onError;
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(rsplit)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t j, pos, count=0;
|
||||
PyObject *list, *sub;
|
||||
|
||||
if (sep_len == 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "empty separator");
|
||||
return NULL;
|
||||
}
|
||||
else if (sep_len == 1)
|
||||
return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);
|
||||
|
||||
list = PyList_New(PREALLOC_SIZE(maxcount));
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
j = str_len;
|
||||
while (maxcount-- > 0) {
|
||||
pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);
|
||||
if (pos < 0)
|
||||
break;
|
||||
SPLIT_ADD(str, pos + sep_len, j);
|
||||
j = pos;
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* No match in str_obj, so just use it as list[0] */
|
||||
Py_INCREF(str_obj);
|
||||
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
||||
count++;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
SPLIT_ADD(str, 0, j);
|
||||
}
|
||||
FIX_PREALLOC_SIZE(list);
|
||||
if (PyList_Reverse(list) < 0)
|
||||
goto onError;
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_LOCAL_INLINE(PyObject *)
|
||||
STRINGLIB(splitlines)(PyObject* str_obj,
|
||||
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
||||
int keepends)
|
||||
{
|
||||
/* This does not use the preallocated list because splitlines is
|
||||
usually run with hundreds of newlines. The overhead of
|
||||
switching between PyList_SET_ITEM and append causes about a
|
||||
2-3% slowdown for that common case. A smarter implementation
|
||||
could move the if check out, so the SET_ITEMs are done first
|
||||
and the appends only done when the prealloc buffer is full.
|
||||
That's too much work for little gain.*/
|
||||
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t j;
|
||||
PyObject *list = PyList_New(0);
|
||||
PyObject *sub;
|
||||
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
|
||||
for (i = j = 0; i < str_len; ) {
|
||||
Py_ssize_t eol;
|
||||
|
||||
/* Find a line and append it */
|
||||
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
|
||||
i++;
|
||||
|
||||
/* Skip the line break reading CRLF as one line break */
|
||||
eol = i;
|
||||
if (i < str_len) {
|
||||
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
|
||||
i += 2;
|
||||
else
|
||||
i++;
|
||||
if (keepends)
|
||||
eol = i;
|
||||
}
|
||||
#ifndef STRINGLIB_MUTABLE
|
||||
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
|
||||
/* No linebreak in str_obj, so just use it as list[0] */
|
||||
if (PyList_Append(list, str_obj))
|
||||
goto onError;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
SPLIT_APPEND(str, j, eol);
|
||||
j = i;
|
||||
}
|
||||
return list;
|
||||
|
||||
onError:
|
||||
Py_DECREF(list);
|
||||
return NULL;
|
||||
}
|
||||
|
28
third_party/python/Objects/stringlib/stringdefs.h
vendored
Normal file
28
third_party/python/Objects/stringlib/stringdefs.h
vendored
Normal file
|
@ -0,0 +1,28 @@
|
|||
#ifndef STRINGLIB_STRINGDEFS_H
|
||||
#define STRINGLIB_STRINGDEFS_H
|
||||
|
||||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 0
|
||||
|
||||
#define FASTSEARCH fastsearch
|
||||
#define STRINGLIB(F) stringlib_##F
|
||||
#define STRINGLIB_OBJECT PyBytesObject
|
||||
#define STRINGLIB_SIZEOF_CHAR 1
|
||||
#define STRINGLIB_CHAR char
|
||||
#define STRINGLIB_TYPE_NAME "string"
|
||||
#define STRINGLIB_PARSE_CODE "S"
|
||||
#define STRINGLIB_EMPTY nullstring
|
||||
#define STRINGLIB_ISSPACE Py_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
|
||||
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
|
||||
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
|
||||
#define STRINGLIB_STR PyBytes_AS_STRING
|
||||
#define STRINGLIB_LEN PyBytes_GET_SIZE
|
||||
#define STRINGLIB_NEW PyBytes_FromStringAndSize
|
||||
#define STRINGLIB_CHECK PyBytes_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_Repr
|
||||
#endif /* !STRINGLIB_STRINGDEFS_H */
|
701
third_party/python/Objects/stringlib/transmogrify.h
vendored
Normal file
701
third_party/python/Objects/stringlib/transmogrify.h
vendored
Normal file
|
@ -0,0 +1,701 @@
|
|||
#if STRINGLIB_IS_UNICODE
|
||||
# error "transmogrify.h only compatible with byte-wise strings"
|
||||
#endif
|
||||
|
||||
/* the more complicated methods. parts of these should be pulled out into the
|
||||
shared code in bytes_methods.c to cut down on duplicate code bloat. */
|
||||
|
||||
static inline PyObject *
|
||||
return_self(PyObject *self)
|
||||
{
|
||||
#if !STRINGLIB_MUTABLE
|
||||
if (STRINGLIB_CHECK_EXACT(self)) {
|
||||
Py_INCREF(self);
|
||||
return self;
|
||||
}
|
||||
#endif
|
||||
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
const char *e, *p;
|
||||
char *q;
|
||||
Py_ssize_t i, j;
|
||||
PyObject *u;
|
||||
static char *kwlist[] = {"tabsize", 0};
|
||||
int tabsize = 8;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
|
||||
kwlist, &tabsize))
|
||||
return NULL;
|
||||
|
||||
/* First pass: determine size of output string */
|
||||
i = j = 0;
|
||||
e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
|
||||
for (p = STRINGLIB_STR(self); p < e; p++) {
|
||||
if (*p == '\t') {
|
||||
if (tabsize > 0) {
|
||||
Py_ssize_t incr = tabsize - (j % tabsize);
|
||||
if (j > PY_SSIZE_T_MAX - incr)
|
||||
goto overflow;
|
||||
j += incr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (j > PY_SSIZE_T_MAX - 1)
|
||||
goto overflow;
|
||||
j++;
|
||||
if (*p == '\n' || *p == '\r') {
|
||||
if (i > PY_SSIZE_T_MAX - j)
|
||||
goto overflow;
|
||||
i += j;
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i > PY_SSIZE_T_MAX - j)
|
||||
goto overflow;
|
||||
|
||||
/* Second pass: create output string and fill it */
|
||||
u = STRINGLIB_NEW(NULL, i + j);
|
||||
if (!u)
|
||||
return NULL;
|
||||
|
||||
j = 0;
|
||||
q = STRINGLIB_STR(u);
|
||||
|
||||
for (p = STRINGLIB_STR(self); p < e; p++) {
|
||||
if (*p == '\t') {
|
||||
if (tabsize > 0) {
|
||||
i = tabsize - (j % tabsize);
|
||||
j += i;
|
||||
while (i--)
|
||||
*q++ = ' ';
|
||||
}
|
||||
}
|
||||
else {
|
||||
j++;
|
||||
*q++ = *p;
|
||||
if (*p == '\n' || *p == '\r')
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return u;
|
||||
overflow:
|
||||
PyErr_SetString(PyExc_OverflowError, "result too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline PyObject *
|
||||
pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
|
||||
{
|
||||
PyObject *u;
|
||||
|
||||
if (left < 0)
|
||||
left = 0;
|
||||
if (right < 0)
|
||||
right = 0;
|
||||
|
||||
if (left == 0 && right == 0) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
|
||||
if (u) {
|
||||
if (left)
|
||||
memset(STRINGLIB_STR(u), fill, left);
|
||||
memcpy(STRINGLIB_STR(u) + left,
|
||||
STRINGLIB_STR(self),
|
||||
STRINGLIB_LEN(self));
|
||||
if (right)
|
||||
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
|
||||
fill, right);
|
||||
}
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
stringlib_ljust(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t width;
|
||||
char fillchar = ' ';
|
||||
|
||||
if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
|
||||
return NULL;
|
||||
|
||||
if (STRINGLIB_LEN(self) >= width) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
stringlib_rjust(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t width;
|
||||
char fillchar = ' ';
|
||||
|
||||
if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
|
||||
return NULL;
|
||||
|
||||
if (STRINGLIB_LEN(self) >= width) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
stringlib_center(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t marg, left;
|
||||
Py_ssize_t width;
|
||||
char fillchar = ' ';
|
||||
|
||||
if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
|
||||
return NULL;
|
||||
|
||||
if (STRINGLIB_LEN(self) >= width) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
marg = width - STRINGLIB_LEN(self);
|
||||
left = marg / 2 + (marg & width & 1);
|
||||
|
||||
return pad(self, left, marg - left, fillchar);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
stringlib_zfill(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t fill;
|
||||
PyObject *s;
|
||||
char *p;
|
||||
Py_ssize_t width;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "n:zfill", &width))
|
||||
return NULL;
|
||||
|
||||
if (STRINGLIB_LEN(self) >= width) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
fill = width - STRINGLIB_LEN(self);
|
||||
|
||||
s = pad(self, fill, 0, '0');
|
||||
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
p = STRINGLIB_STR(s);
|
||||
if (p[fill] == '+' || p[fill] == '-') {
|
||||
/* move sign to beginning of string */
|
||||
p[0] = p[fill];
|
||||
p[fill] = '0';
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/* find and count characters and substrings */
|
||||
|
||||
#define findchar(target, target_len, c) \
|
||||
((char *)memchr((const void *)(target), c, target_len))
|
||||
|
||||
|
||||
static Py_ssize_t
|
||||
countchar(const char *target, Py_ssize_t target_len, char c,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t count = 0;
|
||||
const char *start = target;
|
||||
const char *end = target + target_len;
|
||||
|
||||
while ((start = findchar(start, end - start, c)) != NULL) {
|
||||
count++;
|
||||
if (count >= maxcount)
|
||||
break;
|
||||
start += 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/* Algorithms for different cases of string replacement */
|
||||
|
||||
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_interleave(PyObject *self,
|
||||
const char *to_s, Py_ssize_t to_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s;
|
||||
char *result_s;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count, i;
|
||||
PyObject *result;
|
||||
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
|
||||
/* 1 at the end plus 1 after every character;
|
||||
count = min(maxcount, self_len + 1) */
|
||||
if (maxcount <= self_len) {
|
||||
count = maxcount;
|
||||
}
|
||||
else {
|
||||
/* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
|
||||
count = self_len + 1;
|
||||
}
|
||||
|
||||
/* Check for overflow */
|
||||
/* result_len = count * to_len + self_len; */
|
||||
assert(count > 0);
|
||||
if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"replace bytes are too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = count * to_len + self_len;
|
||||
result = STRINGLIB_NEW(NULL, result_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self_s = STRINGLIB_STR(self);
|
||||
result_s = STRINGLIB_STR(result);
|
||||
|
||||
if (to_len > 1) {
|
||||
/* Lay the first one down (guaranteed this will occur) */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
count -= 1;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
*result_s++ = *self_s++;
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
}
|
||||
}
|
||||
else {
|
||||
result_s[0] = to_s[0];
|
||||
result_s += to_len;
|
||||
count -= 1;
|
||||
for (i = 0; i < count; i++) {
|
||||
*result_s++ = *self_s++;
|
||||
result_s[0] = to_s[0];
|
||||
result_s += to_len;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy the rest of the original string */
|
||||
memcpy(result_s, self_s, self_len - i);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Special case for deleting a single character */
|
||||
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_delete_single_character(PyObject *self,
|
||||
char from_c, Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *start, *next, *end;
|
||||
char *result_s;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count;
|
||||
PyObject *result;
|
||||
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
self_s = STRINGLIB_STR(self);
|
||||
|
||||
count = countchar(self_s, self_len, from_c, maxcount);
|
||||
if (count == 0) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
result_len = self_len - count; /* from_len == 1 */
|
||||
assert(result_len>=0);
|
||||
|
||||
result = STRINGLIB_NEW(NULL, result_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
next = findchar(start, end - start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
memcpy(result_s, start, next - start);
|
||||
result_s += (next - start);
|
||||
start = next + 1;
|
||||
}
|
||||
memcpy(result_s, start, end - start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
|
||||
|
||||
static PyObject *
|
||||
stringlib_replace_delete_substring(PyObject *self,
|
||||
const char *from_s, Py_ssize_t from_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *start, *next, *end;
|
||||
char *result_s;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count, offset;
|
||||
PyObject *result;
|
||||
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
self_s = STRINGLIB_STR(self);
|
||||
|
||||
count = stringlib_count(self_s, self_len,
|
||||
from_s, from_len,
|
||||
maxcount);
|
||||
|
||||
if (count == 0) {
|
||||
/* no matches */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
result_len = self_len - (count * from_len);
|
||||
assert (result_len>=0);
|
||||
|
||||
result = STRINGLIB_NEW(NULL, result_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
offset = stringlib_find(start, end - start,
|
||||
from_s, from_len,
|
||||
0);
|
||||
if (offset == -1)
|
||||
break;
|
||||
next = start + offset;
|
||||
|
||||
memcpy(result_s, start, next - start);
|
||||
|
||||
result_s += (next - start);
|
||||
start = next + from_len;
|
||||
}
|
||||
memcpy(result_s, start, end - start);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_single_character_in_place(PyObject *self,
|
||||
char from_c, char to_c,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *end;
|
||||
char *result_s, *start, *next;
|
||||
Py_ssize_t self_len;
|
||||
PyObject *result;
|
||||
|
||||
/* The result string will be the same size */
|
||||
self_s = STRINGLIB_STR(self);
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
|
||||
next = findchar(self_s, self_len, from_c);
|
||||
|
||||
if (next == NULL) {
|
||||
/* No matches; return the original bytes */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Need to make a new bytes */
|
||||
result = STRINGLIB_NEW(NULL, self_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
memcpy(result_s, self_s, self_len);
|
||||
|
||||
/* change everything in-place, starting with this one */
|
||||
start = result_s + (next - self_s);
|
||||
*start = to_c;
|
||||
start++;
|
||||
end = result_s + self_len;
|
||||
|
||||
while (--maxcount > 0) {
|
||||
next = findchar(start, end - start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
*next = to_c;
|
||||
start = next + 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_substring_in_place(PyObject *self,
|
||||
const char *from_s, Py_ssize_t from_len,
|
||||
const char *to_s, Py_ssize_t to_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *end;
|
||||
char *result_s, *start;
|
||||
Py_ssize_t self_len, offset;
|
||||
PyObject *result;
|
||||
|
||||
/* The result bytes will be the same size */
|
||||
|
||||
self_s = STRINGLIB_STR(self);
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
|
||||
offset = stringlib_find(self_s, self_len,
|
||||
from_s, from_len,
|
||||
0);
|
||||
if (offset == -1) {
|
||||
/* No matches; return the original bytes */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Need to make a new bytes */
|
||||
result = STRINGLIB_NEW(NULL, self_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
memcpy(result_s, self_s, self_len);
|
||||
|
||||
/* change everything in-place, starting with this one */
|
||||
start = result_s + offset;
|
||||
memcpy(start, to_s, from_len);
|
||||
start += from_len;
|
||||
end = result_s + self_len;
|
||||
|
||||
while ( --maxcount > 0) {
|
||||
offset = stringlib_find(start, end - start,
|
||||
from_s, from_len,
|
||||
0);
|
||||
if (offset == -1)
|
||||
break;
|
||||
memcpy(start + offset, to_s, from_len);
|
||||
start += offset + from_len;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_single_character(PyObject *self,
|
||||
char from_c,
|
||||
const char *to_s, Py_ssize_t to_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *start, *next, *end;
|
||||
char *result_s;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count;
|
||||
PyObject *result;
|
||||
|
||||
self_s = STRINGLIB_STR(self);
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
|
||||
count = countchar(self_s, self_len, from_c, maxcount);
|
||||
if (count == 0) {
|
||||
/* no matches, return unchanged */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* use the difference between current and new, hence the "-1" */
|
||||
/* result_len = self_len + count * (to_len-1) */
|
||||
assert(count > 0);
|
||||
if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = self_len + count * (to_len - 1);
|
||||
|
||||
result = STRINGLIB_NEW(NULL, result_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
next = findchar(start, end - start, from_c);
|
||||
if (next == NULL)
|
||||
break;
|
||||
|
||||
if (next == start) {
|
||||
/* replace with the 'to' */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start += 1;
|
||||
} else {
|
||||
/* copy the unchanged old then the 'to' */
|
||||
memcpy(result_s, start, next - start);
|
||||
result_s += (next - start);
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start = next + 1;
|
||||
}
|
||||
}
|
||||
/* Copy the remainder of the remaining bytes */
|
||||
memcpy(result_s, start, end - start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
|
||||
static PyObject *
|
||||
stringlib_replace_substring(PyObject *self,
|
||||
const char *from_s, Py_ssize_t from_len,
|
||||
const char *to_s, Py_ssize_t to_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
const char *self_s, *start, *next, *end;
|
||||
char *result_s;
|
||||
Py_ssize_t self_len, result_len;
|
||||
Py_ssize_t count, offset;
|
||||
PyObject *result;
|
||||
|
||||
self_s = STRINGLIB_STR(self);
|
||||
self_len = STRINGLIB_LEN(self);
|
||||
|
||||
count = stringlib_count(self_s, self_len,
|
||||
from_s, from_len,
|
||||
maxcount);
|
||||
|
||||
if (count == 0) {
|
||||
/* no matches, return unchanged */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Check for overflow */
|
||||
/* result_len = self_len + count * (to_len-from_len) */
|
||||
assert(count > 0);
|
||||
if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
|
||||
PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
|
||||
return NULL;
|
||||
}
|
||||
result_len = self_len + count * (to_len - from_len);
|
||||
|
||||
result = STRINGLIB_NEW(NULL, result_len);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result_s = STRINGLIB_STR(result);
|
||||
|
||||
start = self_s;
|
||||
end = self_s + self_len;
|
||||
while (count-- > 0) {
|
||||
offset = stringlib_find(start, end - start,
|
||||
from_s, from_len,
|
||||
0);
|
||||
if (offset == -1)
|
||||
break;
|
||||
next = start + offset;
|
||||
if (next == start) {
|
||||
/* replace with the 'to' */
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start += from_len;
|
||||
} else {
|
||||
/* copy the unchanged old then the 'to' */
|
||||
memcpy(result_s, start, next - start);
|
||||
result_s += (next - start);
|
||||
memcpy(result_s, to_s, to_len);
|
||||
result_s += to_len;
|
||||
start = next + from_len;
|
||||
}
|
||||
}
|
||||
/* Copy the remainder of the remaining bytes */
|
||||
memcpy(result_s, start, end - start);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
stringlib_replace(PyObject *self,
|
||||
const char *from_s, Py_ssize_t from_len,
|
||||
const char *to_s, Py_ssize_t to_len,
|
||||
Py_ssize_t maxcount)
|
||||
{
|
||||
if (maxcount < 0) {
|
||||
maxcount = PY_SSIZE_T_MAX;
|
||||
} else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) {
|
||||
/* nothing to do; return the original bytes */
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
/* Handle zero-length special cases */
|
||||
if (from_len == 0) {
|
||||
if (to_len == 0) {
|
||||
/* nothing to do; return the original bytes */
|
||||
return return_self(self);
|
||||
}
|
||||
/* insert the 'to' bytes everywhere. */
|
||||
/* >>> b"Python".replace(b"", b".") */
|
||||
/* b'.P.y.t.h.o.n.' */
|
||||
return stringlib_replace_interleave(self, to_s, to_len, maxcount);
|
||||
}
|
||||
|
||||
/* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
|
||||
/* point for an empty self bytes to generate a non-empty bytes */
|
||||
/* Special case so the remaining code always gets a non-empty bytes */
|
||||
if (STRINGLIB_LEN(self) == 0) {
|
||||
return return_self(self);
|
||||
}
|
||||
|
||||
if (to_len == 0) {
|
||||
/* delete all occurrences of 'from' bytes */
|
||||
if (from_len == 1) {
|
||||
return stringlib_replace_delete_single_character(
|
||||
self, from_s[0], maxcount);
|
||||
} else {
|
||||
return stringlib_replace_delete_substring(
|
||||
self, from_s, from_len, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle special case where both bytes have the same length */
|
||||
|
||||
if (from_len == to_len) {
|
||||
if (from_len == 1) {
|
||||
return stringlib_replace_single_character_in_place(
|
||||
self, from_s[0], to_s[0], maxcount);
|
||||
} else {
|
||||
return stringlib_replace_substring_in_place(
|
||||
self, from_s, from_len, to_s, to_len, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise use the more generic algorithms */
|
||||
if (from_len == 1) {
|
||||
return stringlib_replace_single_character(
|
||||
self, from_s[0], to_s, to_len, maxcount);
|
||||
} else {
|
||||
/* len('from')>=2, len('to')>=1 */
|
||||
return stringlib_replace_substring(
|
||||
self, from_s, from_len, to_s, to_len, maxcount);
|
||||
}
|
||||
}
|
||||
|
||||
#undef findchar
|
30
third_party/python/Objects/stringlib/ucs1lib.h
vendored
Normal file
30
third_party/python/Objects/stringlib/ucs1lib.h
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs1lib_fastsearch
|
||||
#define STRINGLIB(F) ucs1lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_SIZEOF_CHAR 1
|
||||
#define STRINGLIB_MAX_CHAR 0xFFu
|
||||
#define STRINGLIB_CHAR Py_UCS1
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW _PyUnicode_FromUCS1
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs1_InsertThousandsGrouping
|
||||
|
||||
|
29
third_party/python/Objects/stringlib/ucs2lib.h
vendored
Normal file
29
third_party/python/Objects/stringlib/ucs2lib.h
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs2lib_fastsearch
|
||||
#define STRINGLIB(F) ucs2lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_SIZEOF_CHAR 2
|
||||
#define STRINGLIB_MAX_CHAR 0xFFFFu
|
||||
#define STRINGLIB_CHAR Py_UCS2
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_STR PyUnicode_2BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW _PyUnicode_FromUCS2
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs2_InsertThousandsGrouping
|
||||
|
29
third_party/python/Objects/stringlib/ucs4lib.h
vendored
Normal file
29
third_party/python/Objects/stringlib/ucs4lib.h
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH ucs4lib_fastsearch
|
||||
#define STRINGLIB(F) ucs4lib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_SIZEOF_CHAR 4
|
||||
#define STRINGLIB_MAX_CHAR 0x10FFFFu
|
||||
#define STRINGLIB_CHAR Py_UCS4
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_STR PyUnicode_4BYTE_DATA
|
||||
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
|
||||
#define STRINGLIB_NEW _PyUnicode_FromUCS4
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyUnicode_ucs4_InsertThousandsGrouping
|
||||
|
11
third_party/python/Objects/stringlib/undef.h
vendored
Normal file
11
third_party/python/Objects/stringlib/undef.h
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
#undef FASTSEARCH
|
||||
#undef STRINGLIB
|
||||
#undef STRINGLIB_SIZEOF_CHAR
|
||||
#undef STRINGLIB_MAX_CHAR
|
||||
#undef STRINGLIB_CHAR
|
||||
#undef STRINGLIB_STR
|
||||
#undef STRINGLIB_LEN
|
||||
#undef STRINGLIB_NEW
|
||||
#undef _Py_InsertThousandsGrouping
|
||||
#undef STRINGLIB_IS_UNICODE
|
||||
|
1288
third_party/python/Objects/stringlib/unicode_format.h
vendored
Normal file
1288
third_party/python/Objects/stringlib/unicode_format.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
32
third_party/python/Objects/stringlib/unicodedefs.h
vendored
Normal file
32
third_party/python/Objects/stringlib/unicodedefs.h
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
#ifndef STRINGLIB_UNICODEDEFS_H
|
||||
#define STRINGLIB_UNICODEDEFS_H
|
||||
|
||||
/* this is sort of a hack. there's at least one place (formatting
|
||||
floats) where some stringlib code takes a different path if it's
|
||||
compiled as unicode. */
|
||||
#define STRINGLIB_IS_UNICODE 1
|
||||
|
||||
#define FASTSEARCH fastsearch
|
||||
#define STRINGLIB(F) stringlib_##F
|
||||
#define STRINGLIB_OBJECT PyUnicodeObject
|
||||
#define STRINGLIB_SIZEOF_CHAR Py_UNICODE_SIZE
|
||||
#define STRINGLIB_CHAR Py_UNICODE
|
||||
#define STRINGLIB_TYPE_NAME "unicode"
|
||||
#define STRINGLIB_PARSE_CODE "U"
|
||||
#define STRINGLIB_EMPTY unicode_empty
|
||||
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
|
||||
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
|
||||
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
|
||||
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
|
||||
#define STRINGLIB_STR PyUnicode_AS_UNICODE
|
||||
#define STRINGLIB_LEN PyUnicode_GET_SIZE
|
||||
#define STRINGLIB_NEW PyUnicode_FromUnicode
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
|
||||
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_TOASCII PyObject_ASCII
|
||||
|
||||
#define STRINGLIB_WANT_CONTAINS_OBJ 1
|
||||
|
||||
#endif /* !STRINGLIB_UNICODEDEFS_H */
|
Loading…
Add table
Add a link
Reference in a new issue