mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-08 15:33:33 +00:00
9b29358511
Status lines for Emacs and Vim have been added to Python sources so they'll be easier to edit using Python's preferred coding style. Some DNS helper functions have been broken up into multiple files. It's nice to have one function per file whenever possible, since that way we don't need -ffunction-sections. Another reason it's good to have small source files, is because the build will be enforcing resource limits on compilation and testing soon.
398 lines
12 KiB
C++
398 lines
12 KiB
C++
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
/* clang-format off */
|
|
|
|
/* stringlib: split implementation */
|
|
|
|
#ifndef STRINGLIB_FASTSEARCH_H
|
|
#error must include fastsearch.inc before including this module
|
|
#endif
|
|
|
|
/* Overallocate the initial list to reduce the number of reallocs for small
|
|
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
|
|
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
|
|
text (roughly 11 words per line) and field delimited data (usually 1-10
|
|
fields). For large strings the split algorithms are bandwidth limited
|
|
so increasing the preallocation likely will not improve things.*/
|
|
|
|
#define MAX_PREALLOC 12
|
|
|
|
/* 5 splits gives 6 elements */
|
|
#define PREALLOC_SIZE(maxsplit) \
|
|
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
|
|
|
|
#define SPLIT_APPEND(data, left, right) \
|
|
sub = STRINGLIB_NEW((data) + (left), \
|
|
(right) - (left)); \
|
|
if (sub == NULL) \
|
|
goto onError; \
|
|
if (PyList_Append(list, sub)) { \
|
|
Py_DECREF(sub); \
|
|
goto onError; \
|
|
} \
|
|
else \
|
|
Py_DECREF(sub);
|
|
|
|
#define SPLIT_ADD(data, left, right) { \
|
|
sub = STRINGLIB_NEW((data) + (left), \
|
|
(right) - (left)); \
|
|
if (sub == NULL) \
|
|
goto onError; \
|
|
if (count < MAX_PREALLOC) { \
|
|
PyList_SET_ITEM(list, count, sub); \
|
|
} else { \
|
|
if (PyList_Append(list, sub)) { \
|
|
Py_DECREF(sub); \
|
|
goto onError; \
|
|
} \
|
|
else \
|
|
Py_DECREF(sub); \
|
|
} \
|
|
count++; }
|
|
|
|
|
|
/* Always force the list to the expected size. */
|
|
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(split_whitespace)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t i, j, count=0;
|
|
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
PyObject *sub;
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = 0;
|
|
while (maxcount-- > 0) {
|
|
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
|
|
i++;
|
|
if (i == str_len) break;
|
|
j = i; i++;
|
|
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
|
|
i++;
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* No whitespace in str_obj, so just use it as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
break;
|
|
}
|
|
#endif
|
|
SPLIT_ADD(str, j, i);
|
|
}
|
|
|
|
if (i < str_len) {
|
|
/* Only occurs when maxcount was reached */
|
|
/* Skip any remaining whitespace and copy to end of string */
|
|
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
|
|
i++;
|
|
if (i != str_len)
|
|
SPLIT_ADD(str, i, str_len);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(split_char)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
const STRINGLIB_CHAR ch,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t i, j, count=0;
|
|
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
PyObject *sub;
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = 0;
|
|
while ((j < str_len) && (maxcount-- > 0)) {
|
|
for(; j < str_len; j++) {
|
|
/* I found that using memchr makes no difference */
|
|
if (str[j] == ch) {
|
|
SPLIT_ADD(str, i, j);
|
|
i = j = j + 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* ch not in str_obj, so just use str_obj as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
} else
|
|
#endif
|
|
if (i <= str_len) {
|
|
SPLIT_ADD(str, i, str_len);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(split)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t i, j, pos, count=0;
|
|
PyObject *list, *sub;
|
|
|
|
if (sep_len == 0) {
|
|
PyErr_SetString(PyExc_ValueError, "empty separator");
|
|
return NULL;
|
|
}
|
|
else if (sep_len == 1)
|
|
return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);
|
|
|
|
list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = 0;
|
|
while (maxcount-- > 0) {
|
|
pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
|
|
if (pos < 0)
|
|
break;
|
|
j = i + pos;
|
|
SPLIT_ADD(str, i, j);
|
|
i = j + sep_len;
|
|
}
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* No match in str_obj, so just use it as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
} else
|
|
#endif
|
|
{
|
|
SPLIT_ADD(str, i, str_len);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t i, j, count=0;
|
|
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
PyObject *sub;
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = str_len - 1;
|
|
while (maxcount-- > 0) {
|
|
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
|
|
i--;
|
|
if (i < 0) break;
|
|
j = i; i--;
|
|
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
|
|
i--;
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* No whitespace in str_obj, so just use it as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
break;
|
|
}
|
|
#endif
|
|
SPLIT_ADD(str, i + 1, j + 1);
|
|
}
|
|
|
|
if (i >= 0) {
|
|
/* Only occurs when maxcount was reached */
|
|
/* Skip any remaining whitespace and copy to beginning of string */
|
|
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
|
|
i--;
|
|
if (i >= 0)
|
|
SPLIT_ADD(str, 0, i + 1);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
if (PyList_Reverse(list) < 0)
|
|
goto onError;
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(rsplit_char)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
const STRINGLIB_CHAR ch,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t i, j, count=0;
|
|
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
PyObject *sub;
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
i = j = str_len - 1;
|
|
while ((i >= 0) && (maxcount-- > 0)) {
|
|
for(; i >= 0; i--) {
|
|
if (str[i] == ch) {
|
|
SPLIT_ADD(str, i + 1, j + 1);
|
|
j = i = i - 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* ch not in str_obj, so just use str_obj as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
} else
|
|
#endif
|
|
if (j >= -1) {
|
|
SPLIT_ADD(str, 0, j + 1);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
if (PyList_Reverse(list) < 0)
|
|
goto onError;
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(rsplit)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
|
|
Py_ssize_t maxcount)
|
|
{
|
|
Py_ssize_t j, pos, count=0;
|
|
PyObject *list, *sub;
|
|
|
|
if (sep_len == 0) {
|
|
PyErr_SetString(PyExc_ValueError, "empty separator");
|
|
return NULL;
|
|
}
|
|
else if (sep_len == 1)
|
|
return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);
|
|
|
|
list = PyList_New(PREALLOC_SIZE(maxcount));
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
j = str_len;
|
|
while (maxcount-- > 0) {
|
|
pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);
|
|
if (pos < 0)
|
|
break;
|
|
SPLIT_ADD(str, pos + sep_len, j);
|
|
j = pos;
|
|
}
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* No match in str_obj, so just use it as list[0] */
|
|
Py_INCREF(str_obj);
|
|
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
|
|
count++;
|
|
} else
|
|
#endif
|
|
{
|
|
SPLIT_ADD(str, 0, j);
|
|
}
|
|
FIX_PREALLOC_SIZE(list);
|
|
if (PyList_Reverse(list) < 0)
|
|
goto onError;
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(PyObject *)
|
|
STRINGLIB(splitlines)(PyObject* str_obj,
|
|
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|
int keepends)
|
|
{
|
|
/* This does not use the preallocated list because splitlines is
|
|
usually run with hundreds of newlines. The overhead of
|
|
switching between PyList_SET_ITEM and append causes about a
|
|
2-3% slowdown for that common case. A smarter implementation
|
|
could move the if check out, so the SET_ITEMs are done first
|
|
and the appends only done when the prealloc buffer is full.
|
|
That's too much work for little gain.*/
|
|
|
|
Py_ssize_t i;
|
|
Py_ssize_t j;
|
|
PyObject *list = PyList_New(0);
|
|
PyObject *sub;
|
|
|
|
if (list == NULL)
|
|
return NULL;
|
|
|
|
for (i = j = 0; i < str_len; ) {
|
|
Py_ssize_t eol;
|
|
|
|
/* Find a line and append it */
|
|
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
|
|
i++;
|
|
|
|
/* Skip the line break reading CRLF as one line break */
|
|
eol = i;
|
|
if (i < str_len) {
|
|
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
|
|
i += 2;
|
|
else
|
|
i++;
|
|
if (keepends)
|
|
eol = i;
|
|
}
|
|
#ifndef STRINGLIB_MUTABLE
|
|
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
|
|
/* No linebreak in str_obj, so just use it as list[0] */
|
|
if (PyList_Append(list, str_obj))
|
|
goto onError;
|
|
break;
|
|
}
|
|
#endif
|
|
SPLIT_APPEND(str, j, eol);
|
|
j = i;
|
|
}
|
|
return list;
|
|
|
|
onError:
|
|
Py_DECREF(list);
|
|
return NULL;
|
|
}
|
|
|