mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
e16a7d8f3b
`et` means `expandtab`. ```sh rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\) et\(.*\) :vi/vi: \1 xoet\2:vi/' rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\)noet\(.*\):vi/vi: \1et\2 :vi/' rg 'vi: .* :vi' -l -0 | \ xargs -0 sed -i '' 's/vi: \(.*\)xoet\(.*\):vi/vi: \1noet\2:vi/' ```
430 lines
12 KiB
C
430 lines
12 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#define PY_SSIZE_T_CLEAN
|
|
#include "libc/assert.h"
|
|
#include "third_party/python/Include/codecs.h"
|
|
#include "third_party/python/Include/pyerrors.h"
|
|
#include "third_party/python/Include/pymem.h"
|
|
#include "third_party/python/Include/unicodeobject.h"
|
|
#include "third_party/python/Include/warnings.h"
|
|
|
|
#define _PyUnicode_STATE(op) \
|
|
(((PyASCIIObject *)(op))->state)
|
|
|
|
int ensure_unicode(PyObject *);
|
|
PyObject *unicode_result(PyObject *);
|
|
int unicode_check_modifiable(PyObject *);
|
|
PyObject *unicode_encode_ucs1(PyObject *, const char *, const Py_UCS4);
|
|
PyObject *_PyUnicode_TranslateCharmap(PyObject *, PyObject *, const char *);
|
|
|
|
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
|
|
This function is kept for backward compatibility with the old API. */
|
|
Py_UNICODE
|
|
PyUnicode_GetMax(void)
|
|
{
|
|
#ifdef Py_UNICODE_WIDE
|
|
return 0x10FFFF;
|
|
#else
|
|
/* This is actually an illegal character, so it should
|
|
not be passed to unichr. */
|
|
return 0xFFFF;
|
|
#endif
|
|
}
|
|
|
|
PyObject *
|
|
PyUnicode_AsDecodedObject(PyObject *unicode,
|
|
const char *encoding,
|
|
const char *errors)
|
|
{
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
return NULL;
|
|
}
|
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
|
"PyUnicode_AsDecodedObject() is deprecated; "
|
|
"use PyCodec_Decode() to decode from str", 1) < 0)
|
|
return NULL;
|
|
if (encoding == NULL)
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
|
/* Decode via the codec registry */
|
|
return PyCodec_Decode(unicode, encoding, errors);
|
|
}
|
|
|
|
PyObject *
|
|
PyUnicode_AsDecodedUnicode(PyObject *unicode,
|
|
const char *encoding,
|
|
const char *errors)
|
|
{
|
|
PyObject *v;
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
goto onError;
|
|
}
|
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
|
"PyUnicode_AsDecodedUnicode() is deprecated; "
|
|
"use PyCodec_Decode() to decode from str to str", 1) < 0)
|
|
return NULL;
|
|
if (encoding == NULL)
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
|
/* Decode via the codec registry */
|
|
v = PyCodec_Decode(unicode, encoding, errors);
|
|
if (v == NULL)
|
|
goto onError;
|
|
if (!PyUnicode_Check(v)) {
|
|
PyErr_Format(PyExc_TypeError,
|
|
"'%.400s' decoder returned '%.400s' instead of 'str'; "
|
|
"use codecs.decode() to decode to arbitrary types",
|
|
encoding,
|
|
Py_TYPE(unicode)->tp_name);
|
|
Py_DECREF(v);
|
|
goto onError;
|
|
}
|
|
return unicode_result(v);
|
|
onError:
|
|
return NULL;
|
|
}
|
|
|
|
PyObject *
|
|
PyUnicode_AsEncodedObject(PyObject *unicode,
|
|
const char *encoding,
|
|
const char *errors)
|
|
{
|
|
PyObject *v;
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
goto onError;
|
|
}
|
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
|
"PyUnicode_AsEncodedObject() is deprecated; "
|
|
"use PyUnicode_AsEncodedString() to encode from str to bytes "
|
|
"or PyCodec_Encode() for generic encoding", 1) < 0)
|
|
return NULL;
|
|
if (encoding == NULL)
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
|
/* Encode via the codec registry */
|
|
v = PyCodec_Encode(unicode, encoding, errors);
|
|
if (v == NULL)
|
|
goto onError;
|
|
return v;
|
|
onError:
|
|
return NULL;
|
|
}
|
|
|
|
PyObject *
|
|
PyUnicode_AsEncodedUnicode(PyObject *unicode,
|
|
const char *encoding,
|
|
const char *errors)
|
|
{
|
|
PyObject *v;
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
goto onError;
|
|
}
|
|
if (PyErr_WarnEx(PyExc_DeprecationWarning,
|
|
"PyUnicode_AsEncodedUnicode() is deprecated; "
|
|
"use PyCodec_Encode() to encode from str to str", 1) < 0)
|
|
return NULL;
|
|
if (encoding == NULL)
|
|
encoding = PyUnicode_GetDefaultEncoding();
|
|
/* Encode via the codec registry */
|
|
v = PyCodec_Encode(unicode, encoding, errors);
|
|
if (v == NULL)
|
|
goto onError;
|
|
if (!PyUnicode_Check(v)) {
|
|
PyErr_Format(PyExc_TypeError,
|
|
"'%.400s' encoder returned '%.400s' instead of 'str'; "
|
|
"use codecs.encode() to encode to arbitrary types",
|
|
encoding,
|
|
Py_TYPE(v)->tp_name);
|
|
Py_DECREF(v);
|
|
goto onError;
|
|
}
|
|
return v;
|
|
onError:
|
|
return NULL;
|
|
}
|
|
|
|
wchar_t *
|
|
_PyUnicode_AsWideCharString(PyObject *unicode)
|
|
{
|
|
const wchar_t *wstr;
|
|
wchar_t *buffer;
|
|
Py_ssize_t buflen;
|
|
if (unicode == NULL) {
|
|
PyErr_BadInternalCall();
|
|
return NULL;
|
|
}
|
|
wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen);
|
|
if (wstr == NULL) {
|
|
return NULL;
|
|
}
|
|
if (wcslen(wstr) != (size_t)buflen) {
|
|
PyErr_SetString(PyExc_ValueError,
|
|
"embedded null character");
|
|
return NULL;
|
|
}
|
|
buffer = PyMem_NEW(wchar_t, buflen + 1);
|
|
if (buffer == NULL) {
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t));
|
|
return buffer;
|
|
}
|
|
|
|
const Py_UNICODE *
|
|
_PyUnicode_AsUnicode(PyObject *unicode)
|
|
{
|
|
Py_ssize_t size;
|
|
const Py_UNICODE *wstr;
|
|
wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
|
|
if (wstr && wcslen(wstr) != (size_t)size) {
|
|
PyErr_SetString(PyExc_ValueError, "embedded null character");
|
|
return NULL;
|
|
}
|
|
return wstr;
|
|
}
|
|
|
|
Py_ssize_t
|
|
PyUnicode_GetSize(PyObject *unicode)
|
|
{
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
goto onError;
|
|
}
|
|
return PyUnicode_GET_SIZE(unicode);
|
|
onError:
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch)
|
|
{
|
|
if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) {
|
|
PyErr_BadArgument();
|
|
return -1;
|
|
}
|
|
assert(PyUnicode_IS_READY(unicode));
|
|
if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) {
|
|
PyErr_SetString(PyExc_IndexError, "string index out of range");
|
|
return -1;
|
|
}
|
|
if (unicode_check_modifiable(unicode))
|
|
return -1;
|
|
if (ch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
|
|
PyErr_SetString(PyExc_ValueError, "character out of range");
|
|
return -1;
|
|
}
|
|
PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
|
|
index, ch);
|
|
return 0;
|
|
}
|
|
|
|
/* Deprecated */
|
|
PyObject *
|
|
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
|
|
Py_ssize_t size,
|
|
const char *errors)
|
|
{
|
|
PyObject *result;
|
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
|
if (unicode == NULL)
|
|
return NULL;
|
|
result = unicode_encode_ucs1(unicode, errors, 256);
|
|
Py_DECREF(unicode);
|
|
return result;
|
|
}
|
|
|
|
/* Deprecated */
|
|
PyObject *
|
|
PyUnicode_EncodeASCII(const Py_UNICODE *p,
|
|
Py_ssize_t size,
|
|
const char *errors)
|
|
{
|
|
PyObject *result;
|
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
|
if (unicode == NULL)
|
|
return NULL;
|
|
result = unicode_encode_ucs1(unicode, errors, 128);
|
|
Py_DECREF(unicode);
|
|
return result;
|
|
}
|
|
|
|
PyObject *
|
|
PyUnicode_Encode(const Py_UNICODE *s,
|
|
Py_ssize_t size,
|
|
const char *encoding,
|
|
const char *errors)
|
|
{
|
|
PyObject *v, *unicode;
|
|
unicode = PyUnicode_FromUnicode(s, size);
|
|
if (unicode == NULL)
|
|
return NULL;
|
|
v = PyUnicode_AsEncodedString(unicode, encoding, errors);
|
|
Py_DECREF(unicode);
|
|
return v;
|
|
}
|
|
|
|
/* Deprecated */
|
|
PyObject *
|
|
PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
|
Py_ssize_t size,
|
|
PyObject *mapping,
|
|
const char *errors)
|
|
{
|
|
PyObject *result;
|
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
|
if (unicode == NULL)
|
|
return NULL;
|
|
result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
|
|
Py_DECREF(unicode);
|
|
return result;
|
|
}
|
|
|
|
/* Deprecated. Use PyUnicode_Translate instead. */
|
|
PyObject *
|
|
PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
|
Py_ssize_t size,
|
|
PyObject *mapping,
|
|
const char *errors)
|
|
{
|
|
PyObject *result;
|
|
PyObject *unicode = PyUnicode_FromUnicode(p, size);
|
|
if (!unicode)
|
|
return NULL;
|
|
result = _PyUnicode_TranslateCharmap(unicode, mapping, errors);
|
|
Py_DECREF(unicode);
|
|
return result;
|
|
}
|
|
|
|
void
|
|
PyUnicode_InternImmortal(PyObject **p)
|
|
{
|
|
PyUnicode_InternInPlace(p);
|
|
if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
|
|
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
|
|
Py_INCREF(*p);
|
|
}
|
|
}
|
|
|
|
Py_UNICODE*
|
|
Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
|
|
{
|
|
Py_UNICODE *u = s1;
|
|
while ((*u++ = *s2++));
|
|
return s1;
|
|
}
|
|
|
|
Py_UNICODE*
|
|
Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
|
|
{
|
|
Py_UNICODE *u = s1;
|
|
while ((*u++ = *s2++))
|
|
if (n-- == 0)
|
|
break;
|
|
return s1;
|
|
}
|
|
|
|
Py_UNICODE*
|
|
Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2)
|
|
{
|
|
Py_UNICODE *u1 = s1;
|
|
u1 += Py_UNICODE_strlen(u1);
|
|
Py_UNICODE_strcpy(u1, s2);
|
|
return s1;
|
|
}
|
|
|
|
int
|
|
Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
|
|
{
|
|
while (*s1 && *s2 && *s1 == *s2)
|
|
s1++, s2++;
|
|
if (*s1 && *s2)
|
|
return (*s1 < *s2) ? -1 : +1;
|
|
if (*s1)
|
|
return 1;
|
|
if (*s2)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
|
|
{
|
|
Py_UNICODE u1, u2;
|
|
for (; n != 0; n--) {
|
|
u1 = *s1;
|
|
u2 = *s2;
|
|
if (u1 != u2)
|
|
return (u1 < u2) ? -1 : +1;
|
|
if (u1 == '\0')
|
|
return 0;
|
|
s1++;
|
|
s2++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
Py_UNICODE*
|
|
Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
|
|
{
|
|
const Py_UNICODE *p;
|
|
for (p = s; *p; p++)
|
|
if (*p == c)
|
|
return (Py_UNICODE*)p;
|
|
return NULL;
|
|
}
|
|
|
|
Py_UNICODE*
|
|
Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c)
|
|
{
|
|
const Py_UNICODE *p;
|
|
p = s + Py_UNICODE_strlen(s);
|
|
while (p != s) {
|
|
p--;
|
|
if (*p == c)
|
|
return (Py_UNICODE*)p;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
size_t
|
|
Py_UNICODE_strlen(const Py_UNICODE *u)
|
|
{
|
|
int res = 0;
|
|
while(*u++)
|
|
res++;
|
|
return res;
|
|
}
|
|
|
|
Py_UNICODE*
|
|
PyUnicode_AsUnicodeCopy(PyObject *unicode)
|
|
{
|
|
Py_UNICODE *u, *copy;
|
|
Py_ssize_t len, size;
|
|
if (!PyUnicode_Check(unicode)) {
|
|
PyErr_BadArgument();
|
|
return NULL;
|
|
}
|
|
u = PyUnicode_AsUnicodeAndSize(unicode, &len);
|
|
if (u == NULL)
|
|
return NULL;
|
|
/* Ensure we won't overflow the size. */
|
|
if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
size = len + 1; /* copy the null character */
|
|
size *= sizeof(Py_UNICODE);
|
|
copy = PyMem_Malloc(size);
|
|
if (copy == NULL) {
|
|
PyErr_NoMemory();
|
|
return NULL;
|
|
}
|
|
memcpy(copy, u, size);
|
|
return copy;
|
|
}
|