mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-01 03:53:33 +00:00
fa20edc44d
- Remove most __ASSEMBLER__ __LINKER__ ifdefs - Rename libc/intrin/bits.h to libc/serialize.h - Block pthread cancelation in fchmodat() polyfill - Remove `clang-format off` statements in third_party
246 lines
8.6 KiB
C
246 lines
8.6 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#define USING_IMPORTED_MAPS
|
|
#include "third_party/python/Include/import.h"
|
|
#include "third_party/python/Include/yoink.h"
|
|
#include "third_party/python/Modules/cjkcodecs/cjkcodecs.h"
|
|
#include "third_party/python/Modules/cjkcodecs/somanyencodings.h"
|
|
|
|
PYTHON_PROVIDE("_codecs_hk");
|
|
PYTHON_PROVIDE("_codecs_hk.__map_big5hkscs");
|
|
PYTHON_PROVIDE("_codecs_hk.__map_big5hkscs_bmp");
|
|
PYTHON_PROVIDE("_codecs_hk.__map_big5hkscs_nonbmp");
|
|
PYTHON_PROVIDE("_codecs_hk.getcodec");
|
|
|
|
/*
|
|
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
|
*
|
|
* Written by Hye-Shik "Bourne to Macro" Chang <perky@FreeBSD.org>
|
|
*/
|
|
|
|
static const unsigned char big5hkscs_phint_0[] = {
|
|
32,5,95,68,15,82,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,208,44,4,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,192,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,22,0,15,0,0,0,0,0,
|
|
32,87,43,247,252,110,242,144,11,0,0,0,192,237,164,15,38,193,155,118,242,239,
|
|
222,251,250,247,15,50,68,175,254,239,5,0,0,0,224,251,71,128,193,2,0,132,100,4,
|
|
130,64,32,162,130,133,164,145,0,16,1,0,0,0,144,72,12,0,48,0,84,3,48,68,24,19,
|
|
53,137,38,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,64,0,32,43,153,32,16,99,40,36,
|
|
1,0,0,0,0,80,96,212,0,210,42,24,157,104,53,151,79,216,248,32,196,130,28,40,2,
|
|
0,0,0,0,214,81,10,224,0,129,134,22,67,196,53,17,55,96,230,122,109,5,12,61,0,0,
|
|
0,0,153,57,128,7,34,254,129,144,24,144,12,116,48,208,160,9,41,21,253,4,0,0,0,
|
|
0,223,128,64,8,8,176,219,196,96,237,118,125,249,29,228,211,133,166,205,5,0,0,
|
|
0,0,12,0,110,186,9,47,96,84,0,30,120,104,34,112,86,158,37,243,142,7,0,0,0,192,
|
|
94,44,188,155,223,93,108,109,4,67,96,54,74,96,216,62,7,196,200,1,0,0,0,160,
|
|
177,197,98,11,12,34,62,204,37,184,1,174,237,92,104,13,148,74,181,0,0,0,0,0,
|
|
244,3,18,17,16,68,2,53,144,235,14,153,7,209,202,5,130,161,160,0,0,0,0,52,24,
|
|
160,137,231,156,91,8,132,3,2,218,144,236,219,135,133,191,162,45,0,0,0,0,118,
|
|
58,118,98,130,148,24,1,24,125,254,141,87,39,19,210,91,55,25,12,0,0,0,0,110,
|
|
139,33,145,0,0,0,64,0,0,0,2,0,0,0,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,2,0,0,0,0,0,0,142,120,110,95,63,126,221,61,247,252,155,252,174,
|
|
210,255,143,107,1,0,0,0,192,159,255,234,186,186,93,188,115,159,250,216,214,
|
|
222,37,75,94,151,218,42,1,0,0,0,224,182,153,27,216,116,230,79,21,191,41,230,
|
|
255,38,117,109,227,255,155,82,0,0,0,0,80,96,126,111,153,169,80,14,0,128,16,
|
|
216,35,0,37,16,144,244,235,117,0,0,0,0,208,219,0,160,152,178,123,6,82,32,152,
|
|
22,200,61,9,0,0,1,0,0,0,0,0,0,0,4,40,200,34,0,2,0,0,16,32,130,80,64,48,1,0,16,
|
|
0,4,0,0,0,0,74,4,1,16,20,0,128,0,4,255,253,36,
|
|
};
|
|
|
|
static const unsigned char big5hkscs_phint_12130[] = {
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
0,0,0,128,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
|
};
|
|
|
|
static const unsigned char big5hkscs_phint_21924[] = {
|
|
0,0,0,0,0,26,172,248,250,90,192,250,51,0,0,0,0,0,129,0,160,156,130,144,9,1,
|
|
180,192,176,3,86,2,160,66,45,136,1,0,0,0,0,146,119,139,96,5,201,33,6,70,56,96,
|
|
72,192,180,36,222,132,224,192,36,0,0,0,0,205,80,197,52,192,40,162,173,124,153,
|
|
24,88,18,34,196,66,162,83,142,30,0,0,0,128,52,135,11,21,209,64,250,61,0,4,210,
|
|
5,72,8,22,230,28,165,0,8,0,0,0,192,45,22,20,128,24,58,212,25,136,28,138,4,
|
|
};
|
|
|
|
/*
|
|
* BIG5HKSCS codec
|
|
*/
|
|
|
|
CODEC_INIT(big5hkscs)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
|
|
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
|
|
* U+00CA U+030C -> 8864
|
|
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
|
|
* U+00EA U+030C -> 88a5
|
|
* These are handled by not mapping tables but a hand-written code.
|
|
*/
|
|
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
|
|
|
|
ENCODER(big5hkscs)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code;
|
|
Py_ssize_t insize;
|
|
|
|
if (c < 0x80) {
|
|
REQUIRE_OUTBUF(1);
|
|
**outbuf = (unsigned char)c;
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
insize = 1;
|
|
REQUIRE_OUTBUF(2);
|
|
|
|
if (c < 0x10000) {
|
|
if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
|
|
if (code == MULTIC) {
|
|
Py_UCS4 c2;
|
|
if (inlen - *inpos >= 2)
|
|
c2 = INCHAR2;
|
|
else
|
|
c2 = 0;
|
|
|
|
if (inlen - *inpos >= 2 &&
|
|
((c & 0xffdf) == 0x00ca) &&
|
|
((c2 & 0xfff7) == 0x0304)) {
|
|
code = big5hkscs_pairenc_table[
|
|
((c >> 4) |
|
|
(c2 >> 3)) & 3];
|
|
insize = 2;
|
|
}
|
|
else if (inlen - *inpos < 2 &&
|
|
!(flags & MBENC_FLUSH))
|
|
return MBERR_TOOFEW;
|
|
else {
|
|
if (c == 0xca)
|
|
code = 0x8866;
|
|
else /* c == 0xea */
|
|
code = 0x88a7;
|
|
}
|
|
}
|
|
}
|
|
else if (TRYMAP_ENC(big5, code, c))
|
|
;
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c < 0x20000)
|
|
return insize;
|
|
else if (c < 0x30000) {
|
|
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
|
|
;
|
|
else
|
|
return insize;
|
|
}
|
|
else
|
|
return insize;
|
|
|
|
OUTBYTE1(code >> 8);
|
|
OUTBYTE2(code & 0xFF);
|
|
NEXT(insize, 2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
|
|
|
|
DECODER(big5hkscs)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1;
|
|
Py_UCS4 decoded;
|
|
|
|
if (c < 0x80) {
|
|
OUTCHAR(c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
|
|
REQUIRE_INBUF(2);
|
|
|
|
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
|
|
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
|
|
OUTCHAR(decoded);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
|
|
{
|
|
int s = BH2S(c, INBYTE2);
|
|
const unsigned char *hintbase;
|
|
|
|
assert(0x87 <= c && c <= 0xfe);
|
|
assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
|
|
|
|
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
|
|
hintbase = big5hkscs_phint_0;
|
|
s -= BH2S(0x87, 0x40);
|
|
}
|
|
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
|
|
hintbase = big5hkscs_phint_12130;
|
|
s -= BH2S(0xc6, 0xa1);
|
|
}
|
|
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
|
|
hintbase = big5hkscs_phint_21924;
|
|
s -= BH2S(0xf9, 0xd6);
|
|
}
|
|
else
|
|
return MBERR_INTERNAL;
|
|
|
|
if (hintbase[s >> 3] & (1 << (s & 7))) {
|
|
OUTCHAR(decoded | 0x20000);
|
|
NEXT_IN(2);
|
|
}
|
|
else {
|
|
OUTCHAR(decoded);
|
|
NEXT_IN(2);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
switch ((c << 8) | INBYTE2) {
|
|
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
|
|
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
|
|
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
|
|
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
|
|
default: return 1;
|
|
}
|
|
|
|
NEXT_IN(2); /* all decoded code points are pairs, above. */
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
BEGIN_MAPPINGS_LIST
|
|
MAPPING_DECONLY(big5hkscs)
|
|
MAPPING_ENCONLY(big5hkscs_bmp)
|
|
MAPPING_ENCONLY(big5hkscs_nonbmp)
|
|
END_MAPPINGS_LIST
|
|
|
|
BEGIN_CODECS_LIST
|
|
CODEC_STATELESS_WINIT(big5hkscs)
|
|
END_CODECS_LIST
|
|
|
|
I_AM_A_MODULE_FOR(hk)
|
|
|
|
#ifdef __aarch64__
|
|
_Section(".rodata.pytab.1 //")
|
|
#else
|
|
_Section(".rodata.pytab.1")
|
|
#endif
|
|
const struct _inittab _PyImport_Inittab__codecs_hk = {
|
|
"_codecs_hk",
|
|
PyInit__codecs_hk,
|
|
};
|