cosmopolitan/third_party/python/Objects/unicodeobject-deadcode.c

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8                               :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Python 3                                                                     │
│ https://docs.python.org/3/license.html                                       │
╚─────────────────────────────────────────────────────────────────────────────*/
#define PY_SSIZE_T_CLEAN
#include "libc/assert.h"
#include "third_party/python/Include/codecs.h"
#include "third_party/python/Include/pyerrors.h"
#include "third_party/python/Include/pymem.h"
#include "third_party/python/Include/unicodeobject.h"
#include "third_party/python/Include/warnings.h"

#define _PyUnicode_STATE(op)                            \
    (((PyASCIIObject *)(op))->state)

int ensure_unicode(PyObject *);
PyObject *unicode_result(PyObject *);
int unicode_check_modifiable(PyObject *);
PyObject *unicode_encode_ucs1(PyObject *, const char *, const Py_UCS4);
PyObject *_PyUnicode_TranslateCharmap(PyObject *, PyObject *, const char *);

/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
   This function is kept for backward compatibility with the old API. */
Py_UNICODE
PyUnicode_GetMax(void)
{
#ifdef Py_UNICODE_WIDE
    return 0x10FFFF;
#else
    /* This is actually an illegal character, so it should
       not be passed to unichr. */
    return 0xFFFF;
#endif
}

PyObject *
PyUnicode_AsDecodedObject(PyObject *unicode,
                          const char *encoding,
                          const char *errors)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    if (PyErr_WarnEx(PyExc_DeprecationWarning,
                     "PyUnicode_AsDecodedObject() is deprecated; "
                     "use PyCodec_Decode() to decode from str", 1) < 0)
        return NULL;
    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();
    /* Decode via the codec registry */
    return PyCodec_Decode(unicode, encoding, errors);
}

PyObject *
PyUnicode_AsDecodedUnicode(PyObject *unicode,
                           const char *encoding,
                           const char *errors)
{
    PyObject *v;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    if (PyErr_WarnEx(PyExc_DeprecationWarning,
                     "PyUnicode_AsDecodedUnicode() is deprecated; "
                     "use PyCodec_Decode() to decode from str to str", 1) < 0)
        return NULL;
    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();
    /* Decode via the codec registry */
    v = PyCodec_Decode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    if (!PyUnicode_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "'%.400s' decoder returned '%.400s' instead of 'str'; "
                     "use codecs.decode() to decode to arbitrary types",
                     encoding,
                     Py_TYPE(unicode)->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return unicode_result(v);
  onError:
    return NULL;
}

PyObject *
PyUnicode_AsEncodedObject(PyObject *unicode,
                          const char *encoding,
                          const char *errors)
{
    PyObject *v;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    if (PyErr_WarnEx(PyExc_DeprecationWarning,
                     "PyUnicode_AsEncodedObject() is deprecated; "
                     "use PyUnicode_AsEncodedString() to encode from str to bytes "
                     "or PyCodec_Encode() for generic encoding", 1) < 0)
        return NULL;
    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();
    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    return v;
  onError:
    return NULL;
}

PyObject *
PyUnicode_AsEncodedUnicode(PyObject *unicode,
                           const char *encoding,
                           const char *errors)
{
    PyObject *v;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    if (PyErr_WarnEx(PyExc_DeprecationWarning,
                     "PyUnicode_AsEncodedUnicode() is deprecated; "
                     "use PyCodec_Encode() to encode from str to str", 1) < 0)
        return NULL;
    if (encoding == NULL)
        encoding = PyUnicode_GetDefaultEncoding();
    /* Encode via the codec registry */
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
    if (!PyUnicode_Check(v)) {
        PyErr_Format(PyExc_TypeError,
                     "'%.400s' encoder returned '%.400s' instead of 'str'; "
                     "use codecs.encode() to encode to arbitrary types",
                     encoding,
                     Py_TYPE(v)->tp_name);
        Py_DECREF(v);
        goto onError;
    }
    return v;
  onError:
    return NULL;
}

wchar_t *
_PyUnicode_AsWideCharString(PyObject *unicode)
{
    const wchar_t *wstr;
    wchar_t *buffer;
    Py_ssize_t buflen;
    if (unicode == NULL) {
        PyErr_BadInternalCall();
        return NULL;
    }
    wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen);
    if (wstr == NULL) {
        return NULL;
    }
    if (wcslen(wstr) != (size_t)buflen) {
        PyErr_SetString(PyExc_ValueError,
                        "embedded null character");
        return NULL;
    }
    buffer = PyMem_NEW(wchar_t, buflen + 1);
    if (buffer == NULL) {
        PyErr_NoMemory();
        return NULL;
    }
    memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t));
    return buffer;
}

const Py_UNICODE *
_PyUnicode_AsUnicode(PyObject *unicode)
{
    Py_ssize_t size;
    const Py_UNICODE *wstr;
    wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
    if (wstr && wcslen(wstr) != (size_t)size) {
        PyErr_SetString(PyExc_ValueError, "embedded null character");
        return NULL;
    }
    return wstr;
}

Py_ssize_t
PyUnicode_GetSize(PyObject *unicode)
{
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        goto onError;
    }
    return PyUnicode_GET_SIZE(unicode);
  onError:
    return -1;
}

int
PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch)
{
    if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) {
        PyErr_BadArgument();
        return -1;
    }
    assert(PyUnicode_IS_READY(unicode));
    if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) {
        PyErr_SetString(PyExc_IndexError, "string index out of range");
        return -1;
    }
    if (unicode_check_modifiable(unicode))
        return -1;
    if (ch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
        PyErr_SetString(PyExc_ValueError, "character out of range");
        return -1;
    }
    PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
                    index, ch);
    return 0;
}

/* Deprecated */
PyObject *
PyUnicode_EncodeLatin1(const Py_UNICODE *p,
                       Py_ssize_t size,
                       const char *errors)
{
    PyObject *result;
    PyObject *unicode = PyUnicode_FromUnicode(p, size);
    if (unicode == NULL)
        return NULL;
    result = unicode_encode_ucs1(unicode, errors, 256);
    Py_DECREF(unicode);
    return result;
}

/* Deprecated */
PyObject *
PyUnicode_EncodeASCII(const Py_UNICODE *p,
                      Py_ssize_t size,
                      const char *errors)
{
    PyObject *result;
    PyObject *unicode = PyUnicode_FromUnicode(p, size);
    if (unicode == NULL)
        return NULL;
    result = unicode_encode_ucs1(unicode, errors, 128);
    Py_DECREF(unicode);
    return result;
}

PyObject *
PyUnicode_Encode(const Py_UNICODE *s,
                 Py_ssize_t size,
                 const char *encoding,
                 const char *errors)
{
    PyObject *v, *unicode;
    unicode = PyUnicode_FromUnicode(s, size);
    if (unicode == NULL)
        return NULL;
    v = PyUnicode_AsEncodedString(unicode, encoding, errors);
    Py_DECREF(unicode);
    return v;
}

/* Deprecated */
PyObject *
PyUnicode_EncodeCharmap(const Py_UNICODE *p,
                        Py_ssize_t size,
                        PyObject *mapping,
                        const char *errors)
{
    PyObject *result;
    PyObject *unicode = PyUnicode_FromUnicode(p, size);
    if (unicode == NULL)
        return NULL;
    result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
    Py_DECREF(unicode);
    return result;
}

/* Deprecated. Use PyUnicode_Translate instead. */
PyObject *
PyUnicode_TranslateCharmap(const Py_UNICODE *p,
                           Py_ssize_t size,
                           PyObject *mapping,
                           const char *errors)
{
    PyObject *result;
    PyObject *unicode = PyUnicode_FromUnicode(p, size);
    if (!unicode)
        return NULL;
    result = _PyUnicode_TranslateCharmap(unicode, mapping, errors);
    Py_DECREF(unicode);
    return result;
}

void
PyUnicode_InternImmortal(PyObject **p)
{
    PyUnicode_InternInPlace(p);
    if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
        _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
        Py_INCREF(*p);
    }
}

Py_UNICODE*
Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
{
    Py_UNICODE *u = s1;
    while ((*u++ = *s2++));
    return s1;
}

Py_UNICODE*
Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
{
    Py_UNICODE *u = s1;
    while ((*u++ = *s2++))
        if (n-- == 0)
            break;
    return s1;
}

Py_UNICODE*
Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2)
{
    Py_UNICODE *u1 = s1;
    u1 += Py_UNICODE_strlen(u1);
    Py_UNICODE_strcpy(u1, s2);
    return s1;
}

int
Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
{
    while (*s1 && *s2 && *s1 == *s2)
        s1++, s2++;
    if (*s1 && *s2)
        return (*s1 < *s2) ? -1 : +1;
    if (*s1)
        return 1;
    if (*s2)
        return -1;
    return 0;
}

int
Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
{
    Py_UNICODE u1, u2;
    for (; n != 0; n--) {
        u1 = *s1;
        u2 = *s2;
        if (u1 != u2)
            return (u1 < u2) ? -1 : +1;
        if (u1 == '\0')
            return 0;
        s1++;
        s2++;
    }
    return 0;
}

Py_UNICODE*
Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
{
    const Py_UNICODE *p;
    for (p = s; *p; p++)
        if (*p == c)
            return (Py_UNICODE*)p;
    return NULL;
}

Py_UNICODE*
Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c)
{
    const Py_UNICODE *p;
    p = s + Py_UNICODE_strlen(s);
    while (p != s) {
        p--;
        if (*p == c)
            return (Py_UNICODE*)p;
    }
    return NULL;
}

size_t
Py_UNICODE_strlen(const Py_UNICODE *u)
{
    int res = 0;
    while(*u++)
        res++;
    return res;
}

Py_UNICODE*
PyUnicode_AsUnicodeCopy(PyObject *unicode)
{
    Py_UNICODE *u, *copy;
    Py_ssize_t len, size;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    u = PyUnicode_AsUnicodeAndSize(unicode, &len);
    if (u == NULL)
        return NULL;
    /* Ensure we won't overflow the size. */
    if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
        PyErr_NoMemory();
        return NULL;
    }
    size = len + 1; /* copy the null character */
    size *= sizeof(Py_UNICODE);
    copy = PyMem_Malloc(size);
    if (copy == NULL) {
        PyErr_NoMemory();
        return NULL;
    }
    memcpy(copy, u, size);
    return copy;
}