cosmopolitan/third_party/python/Objects/accu.c
Justine Tunney b420ed8248 Undiamond Python headers
This change gets the Python codebase into a state where it conforms to
the conventions of this codebase. It's now possible to include headers
from Python, without worrying about ordering. Python has traditionally
solved that problem by "diamonding" everything in Python.h, but that's
problematic since it means any change to any Python header invalidates
all the build artifacts. Lastly it makes tooling not work. Since it is
hard to explain to Emacs when I press C-c C-h to add an import line it
shouldn't add the header that actually defines the symbol, and instead
do follow the nonstandard Python convention.

Progress has been made on letting Python load source code from the zip
executable structure via the standard C library APIs. System calss now
recognizes zip!FILENAME alternative URIs as equivalent to zip:FILENAME
since Python uses colon as its delimiter.

Some progress has been made on embedding the notice license terms into
the Python object code. This is easier said than done since Python has
an extremely complicated ownership story.

- Some termios APIs have been added
- Implement rewinddir() dirstream API
- GetCpuCount() API added to Cosmopolitan Libc
- More bugs in Cosmopolitan Libc have been fixed
- zipobj.com now has flags for mangling the path
- Fixed bug a priori with sendfile() on certain BSDs
- Polyfill F_DUPFD and F_DUPFD_CLOEXEC across platforms
- FIOCLEX / FIONCLEX now polyfilled for fast O_CLOEXEC changes
- APE now supports a hybrid solution to no-self-modify for builds
- Many BSD-only magnums added, e.g. O_SEARCH, O_SHLOCK, SF_NODISKIO
2021-08-12 14:07:40 -07:00

118 lines
2.6 KiB
C

#include "libc/assert.h"
#include "third_party/python/Include/accu.h"
#include "third_party/python/Include/listobject.h"
#include "third_party/python/Include/unicodeobject.h"
/* clang-format off */
/* Accumulator struct implementation */
static PyObject *
join_list_unicode(PyObject *lst)
{
/* return ''.join(lst) */
PyObject *sep, *ret;
sep = PyUnicode_FromStringAndSize("", 0);
ret = PyUnicode_Join(sep, lst);
Py_DECREF(sep);
return ret;
}
int
_PyAccu_Init(_PyAccu *acc)
{
/* Lazily allocated */
acc->large = NULL;
acc->small = PyList_New(0);
if (acc->small == NULL)
return -1;
return 0;
}
static int
flush_accumulator(_PyAccu *acc)
{
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
if (nsmall) {
int ret;
PyObject *joined;
if (acc->large == NULL) {
acc->large = PyList_New(0);
if (acc->large == NULL)
return -1;
}
joined = join_list_unicode(acc->small);
if (joined == NULL)
return -1;
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
Py_DECREF(joined);
return -1;
}
ret = PyList_Append(acc->large, joined);
Py_DECREF(joined);
return ret;
}
return 0;
}
int
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
{
Py_ssize_t nsmall;
assert(PyUnicode_Check(unicode));
if (PyList_Append(acc->small, unicode))
return -1;
nsmall = PyList_GET_SIZE(acc->small);
/* Each item in a list of unicode objects has an overhead (in 64-bit
* builds) of:
* - 8 bytes for the list slot
* - 56 bytes for the header of the unicode object
* that is, 64 bytes. 100000 such objects waste more than 6MB
* compared to a single concatenated string.
*/
if (nsmall < 100000)
return 0;
return flush_accumulator(acc);
}
PyObject *
_PyAccu_FinishAsList(_PyAccu *acc)
{
int ret;
PyObject *res;
ret = flush_accumulator(acc);
Py_CLEAR(acc->small);
if (ret) {
Py_CLEAR(acc->large);
return NULL;
}
res = acc->large;
acc->large = NULL;
return res;
}
PyObject *
_PyAccu_Finish(_PyAccu *acc)
{
PyObject *list, *res;
if (acc->large == NULL) {
list = acc->small;
acc->small = NULL;
}
else {
list = _PyAccu_FinishAsList(acc);
if (!list)
return NULL;
}
res = join_list_unicode(list);
Py_DECREF(list);
return res;
}
void
_PyAccu_Destroy(_PyAccu *acc)
{
Py_CLEAR(acc->small);
Py_CLEAR(acc->large);
}