mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 05:42:29 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
79
third_party/python/Modules/cjkcodecs/README
vendored
Normal file
79
third_party/python/Modules/cjkcodecs/README
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
To generate or modify mapping headers
|
||||
-------------------------------------
|
||||
Mapping headers are imported from CJKCodecs as pre-generated form.
|
||||
If you need to tweak or add something on it, please look at tools/
|
||||
subdirectory of CJKCodecs' distribution.
|
||||
|
||||
|
||||
|
||||
Notes on implmentation characteristics of each codecs
|
||||
-----------------------------------------------------
|
||||
|
||||
1) Big5 codec
|
||||
|
||||
The big5 codec maps the following characters as cp950 does rather
|
||||
than conforming Unicode.org's that maps to 0xFFFD.
|
||||
|
||||
BIG5 Unicode Description
|
||||
|
||||
0xA15A 0x2574 SPACING UNDERSCORE
|
||||
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
|
||||
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
|
||||
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
|
||||
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
|
||||
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
|
||||
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
|
||||
|
||||
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
|
||||
big5 codes already, a roundtrip compatibility is not guaranteed for
|
||||
them.
|
||||
|
||||
|
||||
2) cp932 codec
|
||||
|
||||
To conform to Windows's real mapping, cp932 codec maps the following
|
||||
codepoints in addition of the official cp932 mapping.
|
||||
|
||||
CP932 Unicode Description
|
||||
|
||||
0x80 0x80 UNDEFINED
|
||||
0xA0 0xF8F0 UNDEFINED
|
||||
0xFD 0xF8F1 UNDEFINED
|
||||
0xFE 0xF8F2 UNDEFINED
|
||||
0xFF 0xF8F3 UNDEFINED
|
||||
|
||||
|
||||
3) euc-jisx0213 codec
|
||||
|
||||
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
|
||||
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
|
||||
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
|
||||
is shown as a full width character, mapping to U+FF3C can make
|
||||
more sense.
|
||||
|
||||
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
|
||||
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
|
||||
overlapped by each other, it doesn't bother standard conformations
|
||||
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
|
||||
sessions, the codec will try to encode kanji characters in this
|
||||
order:
|
||||
|
||||
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
|
||||
|
||||
|
||||
4) euc-jp codec
|
||||
|
||||
The euc-jp codec is a compatibility instance on these points:
|
||||
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
|
||||
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
|
||||
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
|
||||
|
||||
|
||||
5) shift-jis codec
|
||||
|
||||
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
|
||||
instead of using JIS X 0201 for compatibility. The differences are:
|
||||
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
|
||||
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
|
||||
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.
|
||||
|
464
third_party/python/Modules/cjkcodecs/_codecs_cn.c
vendored
Normal file
464
third_party/python/Modules/cjkcodecs/_codecs_cn.c
vendored
Normal file
|
@ -0,0 +1,464 @@
|
|||
/*
|
||||
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_cn.h"
|
||||
|
||||
/**
|
||||
* hz is predefined as 100 on AIX. So we undefine it to avoid
|
||||
* conflict against hz codec's.
|
||||
*/
|
||||
#ifdef _AIX
|
||||
#undef hz
|
||||
#endif
|
||||
|
||||
/* GBK and GB2312 map differently in few code points that are listed below:
|
||||
*
|
||||
* gb2312 gbk
|
||||
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
|
||||
* A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
|
||||
* A844 undefined U+2015 HORIZONTAL BAR
|
||||
*/
|
||||
|
||||
#define GBK_DECODE(dc1, dc2, writer) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) { \
|
||||
OUTCHAR(0x2014); \
|
||||
} \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) { \
|
||||
OUTCHAR(0x2015); \
|
||||
} \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) { \
|
||||
OUTCHAR(0x00b7); \
|
||||
} \
|
||||
else if (TRYMAP_DEC(gb2312, decoded, dc1 ^ 0x80, dc2 ^ 0x80)) { \
|
||||
OUTCHAR(decoded); \
|
||||
} \
|
||||
else if (TRYMAP_DEC(gbkext, decoded, dc1, dc2)) { \
|
||||
OUTCHAR(decoded); \
|
||||
}
|
||||
|
||||
#define GBK_ENCODE(code, assi) \
|
||||
if ((code) == 0x2014) { \
|
||||
(assi) = 0xa1aa; \
|
||||
} else if ((code) == 0x2015) { \
|
||||
(assi) = 0xa844; \
|
||||
} else if ((code) == 0x00b7) { \
|
||||
(assi) = 0xa1a4; \
|
||||
} else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \
|
||||
; \
|
||||
}
|
||||
|
||||
/*
|
||||
* GB2312 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb2312)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
if (TRYMAP_ENC(gbcommon, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
OUTBYTE1((code >> 8) | 0x80);
|
||||
OUTBYTE2((code & 0xFF) | 0x80);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GBK codec
|
||||
*/
|
||||
|
||||
ENCODER(gbk)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
GBK_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUTBYTE1((code >> 8) | 0x80);
|
||||
if (code & 0x8000)
|
||||
OUTBYTE2((code & 0xFF)); /* MSB set: GBK */
|
||||
else
|
||||
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
|
||||
GBK_DECODE(c, INBYTE2, writer)
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GB18030 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb18030)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1(c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c >= 0x10000) {
|
||||
Py_UCS4 tc = c - 0x10000;
|
||||
assert (c <= 0x10FFFF);
|
||||
|
||||
REQUIRE_OUTBUF(4);
|
||||
|
||||
OUTBYTE4((unsigned char)(tc % 10) + 0x30);
|
||||
tc /= 10;
|
||||
OUTBYTE3((unsigned char)(tc % 126) + 0x81);
|
||||
tc /= 126;
|
||||
OUTBYTE2((unsigned char)(tc % 10) + 0x30);
|
||||
tc /= 10;
|
||||
OUTBYTE1((unsigned char)(tc + 0x90));
|
||||
|
||||
NEXT(1, 4);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
GBK_ENCODE(c, code)
|
||||
else if (TRYMAP_ENC(gb18030ext, code, c))
|
||||
;
|
||||
else {
|
||||
const struct _gb18030_to_unibmp_ranges *utrrange;
|
||||
|
||||
REQUIRE_OUTBUF(4);
|
||||
|
||||
for (utrrange = gb18030_to_unibmp_ranges;
|
||||
utrrange->first != 0;
|
||||
utrrange++)
|
||||
if (utrrange->first <= c &&
|
||||
c <= utrrange->last) {
|
||||
Py_UCS4 tc;
|
||||
|
||||
tc = c - utrrange->first +
|
||||
utrrange->base;
|
||||
|
||||
OUTBYTE4((unsigned char)(tc % 10) + 0x30);
|
||||
tc /= 10;
|
||||
OUTBYTE3((unsigned char)(tc % 126) + 0x81);
|
||||
tc /= 126;
|
||||
OUTBYTE2((unsigned char)(tc % 10) + 0x30);
|
||||
tc /= 10;
|
||||
OUTBYTE1((unsigned char)tc + 0x81);
|
||||
|
||||
NEXT(1, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
if (utrrange->first == 0)
|
||||
return 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
OUTBYTE1((code >> 8) | 0x80);
|
||||
if (code & 0x8000)
|
||||
OUTBYTE2((code & 0xFF)); /* MSB set: GBK or GB18030ext */
|
||||
else
|
||||
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
|
||||
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1, c2;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
|
||||
c2 = INBYTE2;
|
||||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
Py_UCS4 lseq;
|
||||
|
||||
REQUIRE_INBUF(4);
|
||||
c3 = INBYTE3;
|
||||
c4 = INBYTE4;
|
||||
if (c < 0x81 || c > 0xFE ||
|
||||
c3 < 0x81 || c3 > 0xFE ||
|
||||
c4 < 0x30 || c4 > 0x39)
|
||||
return 1;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
|
||||
(Py_UCS4)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUTCHAR(utr->first - utr->base + lseq);
|
||||
NEXT_IN(4);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
|
||||
* 1260 + (Py_UCS4)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
OUTCHAR(lseq);
|
||||
NEXT_IN(4);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
GBK_DECODE(c, c2, writer)
|
||||
else if (TRYMAP_DEC(gb18030ext, decoded, c, c2))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HZ codec
|
||||
*/
|
||||
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
WRITEBYTE2('~', '}');
|
||||
state->i = 0;
|
||||
NEXT_OUT(2);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(hz)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i) {
|
||||
WRITEBYTE2('~', '}');
|
||||
NEXT_OUT(2);
|
||||
state->i = 0;
|
||||
}
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
if (c == '~') {
|
||||
WRITEBYTE1('~');
|
||||
NEXT_OUT(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
if (TRYMAP_ENC(gbcommon, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
WRITEBYTE4('~', '{', code >> 8, code & 0xff);
|
||||
NEXT(1, 4);
|
||||
state->i = 1;
|
||||
}
|
||||
else {
|
||||
WRITEBYTE2(code >> 8, code & 0xff);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c == '~') {
|
||||
unsigned char c2 = INBYTE2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (c2 == '~' && state->i == 0)
|
||||
OUTCHAR('~');
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '\n' && state->i == 0)
|
||||
; /* line-continuation */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
}
|
||||
else { /* GB mode */
|
||||
REQUIRE_INBUF(2);
|
||||
if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(gb2312)
|
||||
MAPPING_DECONLY(gbkext)
|
||||
MAPPING_ENCONLY(gbcommon)
|
||||
MAPPING_ENCDEC(gb18030ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(gb2312)
|
||||
CODEC_STATELESS(gbk)
|
||||
CODEC_STATELESS(gb18030)
|
||||
CODEC_STATEFUL(hz)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(cn)
|
191
third_party/python/Modules/cjkcodecs/_codecs_hk.c
vendored
Normal file
191
third_party/python/Modules/cjkcodecs/_codecs_hk.c
vendored
Normal file
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#define USING_IMPORTED_MAPS
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_hk.h"
|
||||
|
||||
/*
|
||||
* BIG5HKSCS codec
|
||||
*/
|
||||
|
||||
static const encode_map *big5_encmap = NULL;
|
||||
static const decode_map *big5_decmap = NULL;
|
||||
|
||||
CODEC_INIT(big5hkscs)
|
||||
{
|
||||
static int initialized = 0;
|
||||
|
||||
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
|
||||
return -1;
|
||||
initialized = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
|
||||
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
|
||||
* U+00CA U+030C -> 8864
|
||||
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
|
||||
* U+00EA U+030C -> 88a5
|
||||
* These are handled by not mapping tables but a hand-written code.
|
||||
*/
|
||||
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
|
||||
|
||||
ENCODER(big5hkscs)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1);
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
insize = 1;
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
if (c < 0x10000) {
|
||||
if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
|
||||
if (code == MULTIC) {
|
||||
Py_UCS4 c2;
|
||||
if (inlen - *inpos >= 2)
|
||||
c2 = INCHAR2;
|
||||
else
|
||||
c2 = 0;
|
||||
|
||||
if (inlen - *inpos >= 2 &&
|
||||
((c & 0xffdf) == 0x00ca) &&
|
||||
((c2 & 0xfff7) == 0x0304)) {
|
||||
code = big5hkscs_pairenc_table[
|
||||
((c >> 4) |
|
||||
(c2 >> 3)) & 3];
|
||||
insize = 2;
|
||||
}
|
||||
else if (inlen - *inpos < 2 &&
|
||||
!(flags & MBENC_FLUSH))
|
||||
return MBERR_TOOFEW;
|
||||
else {
|
||||
if (c == 0xca)
|
||||
code = 0x8866;
|
||||
else /* c == 0xea */
|
||||
code = 0x88a7;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (TRYMAP_ENC(big5, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c < 0x20000)
|
||||
return insize;
|
||||
else if (c < 0x30000) {
|
||||
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
|
||||
;
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
OUTBYTE1(code >> 8);
|
||||
OUTBYTE2(code & 0xFF);
|
||||
NEXT(insize, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
|
||||
|
||||
DECODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
|
||||
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
|
||||
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
|
||||
{
|
||||
int s = BH2S(c, INBYTE2);
|
||||
const unsigned char *hintbase;
|
||||
|
||||
assert(0x87 <= c && c <= 0xfe);
|
||||
assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
|
||||
|
||||
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
|
||||
hintbase = big5hkscs_phint_0;
|
||||
s -= BH2S(0x87, 0x40);
|
||||
}
|
||||
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
|
||||
hintbase = big5hkscs_phint_12130;
|
||||
s -= BH2S(0xc6, 0xa1);
|
||||
}
|
||||
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
|
||||
hintbase = big5hkscs_phint_21924;
|
||||
s -= BH2S(0xf9, 0xd6);
|
||||
}
|
||||
else
|
||||
return MBERR_INTERNAL;
|
||||
|
||||
if (hintbase[s >> 3] & (1 << (s & 7))) {
|
||||
OUTCHAR(decoded | 0x20000);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch ((c << 8) | INBYTE2) {
|
||||
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
|
||||
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
|
||||
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
|
||||
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
|
||||
default: return 1;
|
||||
}
|
||||
|
||||
NEXT_IN(2); /* all decoded code points are pairs, above. */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(big5hkscs)
|
||||
MAPPING_ENCONLY(big5hkscs_bmp)
|
||||
MAPPING_ENCONLY(big5hkscs_nonbmp)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS_WINIT(big5hkscs)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(hk)
|
1143
third_party/python/Modules/cjkcodecs/_codecs_iso2022.c
vendored
Normal file
1143
third_party/python/Modules/cjkcodecs/_codecs_iso2022.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
760
third_party/python/Modules/cjkcodecs/_codecs_jp.c
vendored
Normal file
760
third_party/python/Modules/cjkcodecs/_codecs_jp.c
vendored
Normal file
|
@ -0,0 +1,760 @@
|
|||
/*
|
||||
* _codecs_jp.c: Codecs collection for Japanese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_jp.h"
|
||||
#include "mappings_jisx0213_pair.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "emu_jisx0213_2000.h"
|
||||
|
||||
/*
|
||||
* CP932 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp932)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (c <= 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
WRITEBYTE1(c - 0xfec0);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
||||
/* Windows compatibility */
|
||||
REQUIRE_OUTBUF(1);
|
||||
if (c == 0xf8f0)
|
||||
OUTBYTE1(0xa0);
|
||||
else
|
||||
OUTBYTE1(c - 0xf8f1 + 0xfd);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
if (TRYMAP_ENC(cp932ext, code, c)) {
|
||||
OUTBYTE1(code >> 8);
|
||||
OUTBYTE2(code & 0xff);
|
||||
}
|
||||
else if (TRYMAP_ENC(jisxcommon, code, c)) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
|
||||
/* JIS X 0208 */
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
|
||||
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
||||
}
|
||||
else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UCS4)(c - 0xe000) / 188;
|
||||
c2 = (Py_UCS4)(c - 0xe000) % 188;
|
||||
OUTBYTE1(c1 + 0xf0);
|
||||
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1, c2;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c <= 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUTCHAR(0xf8f0); /* half-width katakana */
|
||||
else
|
||||
OUTCHAR(0xfec0 + c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUTCHAR(0xf8f1 - 0xfd + c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
|
||||
if (TRYMAP_DEC(cp932ext, decoded, c, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 1;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (TRYMAP_DEC(jisx0208, decoded, c, c2))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
||||
(c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUTCHAR(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jis_2004)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1(c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
insize = 1;
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
|
||||
if (code == MULTIC) {
|
||||
if (inlen - *inpos < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
Py_UCS4 c2 = INCHAR2;
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, c2,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (TRYMAP_ENC(jisxcommon, code, c))
|
||||
;
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITEBYTE2(0x8e, c - 0xfec0);
|
||||
NEXT(1, 2);
|
||||
continue;
|
||||
}
|
||||
else if (c == 0xff3c)
|
||||
/* F/W REVERSE SOLIDUS (see NOTES) */
|
||||
code = 0x2140;
|
||||
else if (c == 0xff5e)
|
||||
/* F/W TILDE (see NOTES) */
|
||||
code = 0x2232;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff))
|
||||
;
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* Codeset 2 */
|
||||
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
|
||||
NEXT(insize, 3);
|
||||
} else {
|
||||
/* Codeset 1 */
|
||||
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
|
||||
NEXT(insize, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 code, decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUTCHAR(0xfec0 + c2);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3);
|
||||
c2 = INBYTE2 ^ 0x80;
|
||||
c3 = INBYTE3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
|
||||
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c2, c3))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_2_emp, code, c2, c3)) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(3);
|
||||
continue;
|
||||
}
|
||||
else if (TRYMAP_DEC(jisx0212, decoded, c2, c3))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(3);
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c ^= 0x80;
|
||||
c2 = INBYTE2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
|
||||
else if (c == 0x21 && c2 == 0x40)
|
||||
OUTCHAR(0xff3c);
|
||||
else if (c == 0x22 && c2 == 0x32)
|
||||
OUTCHAR(0xff5e);
|
||||
else if (TRYMAP_DEC(jisx0208, decoded, c, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_1_emp, code, c, c2)) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else if (TRYMAP_DEC(jisx0213_pair, code, c, c2)) {
|
||||
OUTCHAR2(code >> 16, code & 0xffff);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JP codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jp)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
if (TRYMAP_ENC(jisxcommon, code, c))
|
||||
;
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITEBYTE2(0x8e, c - 0xfec0);
|
||||
NEXT(1, 2);
|
||||
continue;
|
||||
}
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else if (c == 0xa5) { /* YEN SIGN */
|
||||
WRITEBYTE1(0x5c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
} else if (c == 0x203e) { /* OVERLINE */
|
||||
WRITEBYTE1(0x7e);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* JIS X 0212 */
|
||||
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
|
||||
NEXT(1, 3);
|
||||
} else {
|
||||
/* JIS X 0208 */
|
||||
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUTCHAR(0xfec0 + c2);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3);
|
||||
c2 = INBYTE2;
|
||||
c3 = INBYTE3;
|
||||
/* JIS X 0212 */
|
||||
if (TRYMAP_DEC(jisx0212, decoded, c2 ^ 0x80, c3 ^ 0x80)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(3);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
/* JIS X 0208 */
|
||||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0)
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUTCHAR(0xff3c);
|
||||
else
|
||||
#endif
|
||||
if (TRYMAP_DEC(jisx0208, decoded, c ^ 0x80, c2 ^ 0x80))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
#else
|
||||
if (c < 0x80)
|
||||
code = c;
|
||||
else if (c == 0x00a5)
|
||||
code = 0x5c; /* YEN SIGN */
|
||||
else if (c == 0x203e)
|
||||
code = 0x7e; /* OVERLINE */
|
||||
#endif
|
||||
else JISX0201_K_ENCODE(c, code)
|
||||
else if (c > 0xFFFF)
|
||||
return 1;
|
||||
else
|
||||
code = NOCHAR;
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
REQUIRE_OUTBUF(1);
|
||||
|
||||
OUTBYTE1((unsigned char)code);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (TRYMAP_ENC(jisxcommon, code, c))
|
||||
;
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c)
|
||||
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
|
||||
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, writer)
|
||||
#else
|
||||
if (c < 0x80)
|
||||
OUTCHAR(c);
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, writer)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
unsigned char c1, c2;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 1;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUTCHAR(0xff3c);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(1); /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis_2004)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
Py_ssize_t insize;
|
||||
|
||||
JISX0201_ENCODE(c, code)
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
WRITEBYTE1((unsigned char)code);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
insize = 1;
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (c <= 0xffff) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
|
||||
if (code == MULTIC) {
|
||||
if (inlen - *inpos < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap
|
||||
((ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
Py_UCS4 ch2 = INCHAR2;
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, ch2,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (TRYMAP_ENC(jisxcommon, code, c)) {
|
||||
/* abandon JIS X 0212 codes */
|
||||
if (code & 0x8000)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff))
|
||||
;
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = (code & 0xff) - 0x21;
|
||||
|
||||
if (c1 & 0x80) {
|
||||
/* Plane 2 */
|
||||
if (c1 >= 0xee)
|
||||
c1 -= 0x87;
|
||||
else if (c1 >= 0xac || c1 == 0xa8)
|
||||
c1 -= 0x49;
|
||||
else
|
||||
c1 -= 0x43;
|
||||
}
|
||||
else {
|
||||
/* Plane 1 */
|
||||
c1 -= 0x21;
|
||||
}
|
||||
|
||||
if (c1 & 1)
|
||||
c2 += 0x5e;
|
||||
c1 >>= 1;
|
||||
OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1));
|
||||
OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41));
|
||||
|
||||
NEXT(insize, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
|
||||
JISX0201_DECODE(c, writer)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
|
||||
unsigned char c1, c2;
|
||||
Py_UCS4 code, decoded;
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 1;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
|
||||
c1, c2)
|
||||
else if (TRYMAP_DEC(jisx0208, decoded, c1, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c1, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_1_emp, code, c1, c2))
|
||||
OUTCHAR(EMPBASE | code);
|
||||
else if (TRYMAP_DEC(jisx0213_pair, code, c1, c2))
|
||||
OUTCHAR2(code >> 16, code & 0xffff);
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else { /* Plane 2 */
|
||||
if (c1 >= 0x67)
|
||||
c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f)
|
||||
c1 -= 0x37;
|
||||
else
|
||||
c1 -= 0x3d;
|
||||
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
|
||||
c1, c2)
|
||||
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c1, c2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(jisx0213_2_emp, code, c1, c2)) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(1); /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(jisx0208)
|
||||
MAPPING_DECONLY(jisx0212)
|
||||
MAPPING_ENCONLY(jisxcommon)
|
||||
MAPPING_DECONLY(jisx0213_1_bmp)
|
||||
MAPPING_DECONLY(jisx0213_2_bmp)
|
||||
MAPPING_ENCONLY(jisx0213_bmp)
|
||||
MAPPING_DECONLY(jisx0213_1_emp)
|
||||
MAPPING_DECONLY(jisx0213_2_emp)
|
||||
MAPPING_ENCONLY(jisx0213_emp)
|
||||
MAPPING_ENCDEC(jisx0213_pair)
|
||||
MAPPING_ENCDEC(cp932ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(shift_jis)
|
||||
CODEC_STATELESS(cp932)
|
||||
CODEC_STATELESS(euc_jp)
|
||||
CODEC_STATELESS(shift_jis_2004)
|
||||
CODEC_STATELESS(euc_jis_2004)
|
||||
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
|
||||
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(jp)
|
468
third_party/python/Modules/cjkcodecs/_codecs_kr.c
vendored
Normal file
468
third_party/python/Modules/cjkcodecs/_codecs_kr.c
vendored
Normal file
|
@ -0,0 +1,468 @@
|
|||
/*
|
||||
* _codecs_kr.c: Codecs collection for Korean encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_kr.h"
|
||||
|
||||
/*
|
||||
* EUC-KR codec
|
||||
*/
|
||||
|
||||
#define EUCKR_JAMO_FIRSTBYTE 0xA4
|
||||
#define EUCKR_JAMO_FILLER 0xD4
|
||||
|
||||
static const unsigned char u2cgk_choseong[19] = {
|
||||
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
|
||||
0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
|
||||
0xbc, 0xbd, 0xbe
|
||||
};
|
||||
static const unsigned char u2cgk_jungseong[21] = {
|
||||
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
|
||||
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
|
||||
0xcf, 0xd0, 0xd1, 0xd2, 0xd3
|
||||
};
|
||||
static const unsigned char u2cgk_jongseong[28] = {
|
||||
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
|
||||
0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
|
||||
0xbb, 0xbc, 0xbd, 0xbe
|
||||
};
|
||||
|
||||
ENCODER(euc_kr)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
if (TRYMAP_ENC(cp949, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
if ((code & 0x8000) == 0) {
|
||||
/* KS X 1001 coded character */
|
||||
OUTBYTE1((code >> 8) | 0x80);
|
||||
OUTBYTE2((code & 0xFF) | 0x80);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
else {
|
||||
/* Mapping is found in CP949 extension,
|
||||
but we encode it in KS X 1001:1998 Annex 3,
|
||||
make-up sequence for EUC-KR. */
|
||||
|
||||
REQUIRE_OUTBUF(8);
|
||||
|
||||
/* syllable composition precedence */
|
||||
OUTBYTE1(EUCKR_JAMO_FIRSTBYTE);
|
||||
OUTBYTE2(EUCKR_JAMO_FILLER);
|
||||
|
||||
/* All code points in CP949 extension are in unicode
|
||||
* Hangul Syllable area. */
|
||||
assert(0xac00 <= c && c <= 0xd7a3);
|
||||
c -= 0xac00;
|
||||
|
||||
OUTBYTE3(EUCKR_JAMO_FIRSTBYTE);
|
||||
OUTBYTE4(u2cgk_choseong[c / 588]);
|
||||
NEXT_OUT(4);
|
||||
|
||||
OUTBYTE1(EUCKR_JAMO_FIRSTBYTE);
|
||||
OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]);
|
||||
OUTBYTE3(EUCKR_JAMO_FIRSTBYTE);
|
||||
OUTBYTE4(u2cgk_jongseong[c % 28]);
|
||||
NEXT(1, 4);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NONE 127
|
||||
|
||||
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
|
||||
0, 1, NONE, 2, NONE, NONE, 3, 4,
|
||||
5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
6, 7, 8, NONE, 9, 10, 11, 12,
|
||||
13, 14, 15, 16, 17, 18
|
||||
};
|
||||
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
|
||||
1, 2, 3, 4, 5, 6, 7, NONE,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, NONE, 18, 19, 20, 21, 22,
|
||||
NONE, 23, 24, 25, 26, 27
|
||||
};
|
||||
|
||||
DECODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
|
||||
if (c == EUCKR_JAMO_FIRSTBYTE &&
|
||||
INBYTE2 == EUCKR_JAMO_FILLER) {
|
||||
/* KS X 1001:1998 Annex 3 make-up sequence */
|
||||
DBCHAR cho, jung, jong;
|
||||
|
||||
REQUIRE_INBUF(8);
|
||||
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
|
||||
return 1;
|
||||
|
||||
c = (*inbuf)[3];
|
||||
if (0xa1 <= c && c <= 0xbe)
|
||||
cho = cgk2u_choseong[c - 0xa1];
|
||||
else
|
||||
cho = NONE;
|
||||
|
||||
c = (*inbuf)[5];
|
||||
jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
|
||||
|
||||
c = (*inbuf)[7];
|
||||
if (c == EUCKR_JAMO_FILLER)
|
||||
jong = 0;
|
||||
else if (0xa1 <= c && c <= 0xbe)
|
||||
jong = cgk2u_jongseong[c - 0xa1];
|
||||
else
|
||||
jong = NONE;
|
||||
|
||||
if (cho == NONE || jung == NONE || jong == NONE)
|
||||
return 1;
|
||||
|
||||
OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
|
||||
NEXT_IN(8);
|
||||
}
|
||||
else if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
|
||||
|
||||
/*
|
||||
* CP949 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp949)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
if (TRYMAP_ENC(cp949, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUTBYTE1((code >> 8) | 0x80);
|
||||
if (code & 0x8000)
|
||||
OUTBYTE2(code & 0xFF); /* MSB set: CP949 */
|
||||
else
|
||||
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: ks x 1001 */
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* JOHAB codec
|
||||
*/
|
||||
|
||||
static const unsigned char u2johabidx_choseong[32] = {
|
||||
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14,
|
||||
};
|
||||
static const unsigned char u2johabidx_jungseong[32] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const unsigned char u2johabidx_jongseong[32] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const DBCHAR u2johabjamo[] = {
|
||||
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
|
||||
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
|
||||
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
|
||||
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
|
||||
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
|
||||
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
|
||||
0x8741, 0x8761, 0x8781, 0x87a1,
|
||||
};
|
||||
|
||||
ENCODER(johab)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
if (c >= 0xac00 && c <= 0xd7a3) {
|
||||
c -= 0xac00;
|
||||
code = 0x8000 |
|
||||
(u2johabidx_choseong[c / 588] << 10) |
|
||||
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
|
||||
u2johabidx_jongseong[c % 28];
|
||||
}
|
||||
else if (c >= 0x3131 && c <= 0x3163)
|
||||
code = u2johabjamo[c - 0x3131];
|
||||
else if (TRYMAP_ENC(cp949, code, c)) {
|
||||
unsigned char c1, c2, t2;
|
||||
unsigned short t1;
|
||||
|
||||
assert((code & 0x8000) == 0);
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
if (((c1 >= 0x21 && c1 <= 0x2c) ||
|
||||
(c1 >= 0x4a && c1 <= 0x7d)) &&
|
||||
(c2 >= 0x21 && c2 <= 0x7e)) {
|
||||
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
|
||||
(c1 - 0x21 + 0x197));
|
||||
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
OUTBYTE1(t1 >> 1);
|
||||
OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43);
|
||||
NEXT(1, 2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUTBYTE1(code >> 8);
|
||||
OUTBYTE2(code & 0xff);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FILL 0xfd
|
||||
#define NONE 0xff
|
||||
|
||||
static const unsigned char johabidx_choseong[32] = {
|
||||
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||||
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
|
||||
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
|
||||
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jongseong[32] = {
|
||||
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
|
||||
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
|
||||
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
|
||||
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
|
||||
};
|
||||
|
||||
static const unsigned char johabjamo_choseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
|
||||
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
|
||||
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jongseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
|
||||
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
|
||||
};
|
||||
|
||||
DECODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1, c2;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
c2 = INBYTE2;
|
||||
|
||||
if (c < 0xd8) {
|
||||
/* johab hangul */
|
||||
unsigned char c_cho, c_jung, c_jong;
|
||||
unsigned char i_cho, i_jung, i_jong;
|
||||
|
||||
c_cho = (c >> 2) & 0x1f;
|
||||
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
|
||||
c_jong = c2 & 0x1f;
|
||||
|
||||
i_cho = johabidx_choseong[c_cho];
|
||||
i_jung = johabidx_jungseong[c_jung];
|
||||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 1;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUTCHAR(0x3000);
|
||||
else
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_jongseong[c_jong]);
|
||||
}
|
||||
else {
|
||||
if (i_jong == FILL)
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_jungseong[c_jung]);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_choseong[c_cho]);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
OUTCHAR(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong));
|
||||
}
|
||||
NEXT_IN(2);
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 1;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
|
||||
2 * c - 0x197);
|
||||
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
|
||||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
if (TRYMAP_DEC(ksx1001, decoded, t1, t2)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
#undef FILL
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(ksx1001)
|
||||
MAPPING_ENCONLY(cp949)
|
||||
MAPPING_DECONLY(cp949ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(euc_kr)
|
||||
CODEC_STATELESS(cp949)
|
||||
CODEC_STATELESS(johab)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(kr)
|
143
third_party/python/Modules/cjkcodecs/_codecs_tw.c
vendored
Normal file
143
third_party/python/Modules/cjkcodecs/_codecs_tw.c
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* _codecs_tw.c: Codecs collection for Taiwan's encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_tw.h"
|
||||
|
||||
/*
|
||||
* BIG5 codec
|
||||
*/
|
||||
|
||||
ENCODER(big5)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1);
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
|
||||
if (TRYMAP_ENC(big5, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUTBYTE1(code >> 8);
|
||||
OUTBYTE2(code & 0xFF);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CP950 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp950)
|
||||
{
|
||||
while (*inpos < inlen) {
|
||||
Py_UCS4 c = INCHAR1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITEBYTE1((unsigned char)c);
|
||||
NEXT(1, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c > 0xFFFF)
|
||||
return 1;
|
||||
|
||||
REQUIRE_OUTBUF(2);
|
||||
if (TRYMAP_ENC(cp950ext, code, c))
|
||||
;
|
||||
else if (TRYMAP_ENC(big5, code, c))
|
||||
;
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUTBYTE1(code >> 8);
|
||||
OUTBYTE2(code & 0xFF);
|
||||
NEXT(1, 2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = INBYTE1;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2);
|
||||
|
||||
if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2))
|
||||
OUTCHAR(decoded);
|
||||
else if (TRYMAP_DEC(big5, decoded, c, INBYTE2))
|
||||
OUTCHAR(decoded);
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_ENCDEC(big5)
|
||||
MAPPING_ENCDEC(cp950ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(big5)
|
||||
CODEC_STATELESS(cp950)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(tw)
|
65
third_party/python/Modules/cjkcodecs/alg_jisx0201.h
vendored
Normal file
65
third_party/python/Modules/cjkcodecs/alg_jisx0201.h
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
#define JISX0201_R_ENCODE(c, assi) \
|
||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) { \
|
||||
(assi) = (c); \
|
||||
} \
|
||||
else if ((c) == 0x00a5) { \
|
||||
(assi) = 0x5c; \
|
||||
} \
|
||||
else if ((c) == 0x203e) { \
|
||||
(assi) = 0x7e; \
|
||||
}
|
||||
|
||||
#define JISX0201_K_ENCODE(c, assi) \
|
||||
if ((c) >= 0xff61 && (c) <= 0xff9f) { \
|
||||
(assi) = (c) - 0xfec0; \
|
||||
}
|
||||
|
||||
#define JISX0201_ENCODE(c, assi) \
|
||||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE_CHAR(c, assi) \
|
||||
if ((c) < 0x5c) { \
|
||||
(assi) = (c); \
|
||||
} \
|
||||
else if ((c) == 0x5c) { \
|
||||
(assi) = 0x00a5; \
|
||||
} \
|
||||
else if ((c) < 0x7e) { \
|
||||
(assi) = (c); \
|
||||
} \
|
||||
else if ((c) == 0x7e) { \
|
||||
(assi) = 0x203e; \
|
||||
} \
|
||||
else if ((c) == 0x7f) { \
|
||||
(assi) = 0x7f; \
|
||||
}
|
||||
|
||||
#define JISX0201_R_DECODE(c, writer) \
|
||||
if ((c) < 0x5c) { \
|
||||
OUTCHAR(c); \
|
||||
} \
|
||||
else if ((c) == 0x5c) { \
|
||||
OUTCHAR(0x00a5); \
|
||||
} \
|
||||
else if ((c) < 0x7e) { \
|
||||
OUTCHAR(c); \
|
||||
} \
|
||||
else if ((c) == 0x7e) { \
|
||||
OUTCHAR(0x203e); \
|
||||
} \
|
||||
else if ((c) == 0x7f) { \
|
||||
OUTCHAR(0x7f); \
|
||||
}
|
||||
|
||||
#define JISX0201_K_DECODE(c, writer) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) { \
|
||||
OUTCHAR(0xfec0 + (c)); \
|
||||
}
|
||||
#define JISX0201_K_DECODE_CHAR(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) { \
|
||||
(assi) = 0xfec0 + (c); \
|
||||
}
|
||||
#define JISX0201_DECODE(c, writer) \
|
||||
JISX0201_R_DECODE(c, writer) \
|
||||
else JISX0201_K_DECODE(c, writer)
|
417
third_party/python/Modules/cjkcodecs/cjkcodecs.h
vendored
Normal file
417
third_party/python/Modules/cjkcodecs/cjkcodecs.h
vendored
Normal file
|
@ -0,0 +1,417 @@
|
|||
/*
|
||||
* cjkcodecs.h: common header for cjkcodecs
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#ifndef _CJKCODECS_H_
|
||||
#define _CJKCODECS_H_
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "multibytecodec.h"
|
||||
|
||||
|
||||
/* a unicode "undefined" code point */
|
||||
#define UNIINV 0xFFFE
|
||||
|
||||
/* internal-use DBCS code points which aren't used by any charsets */
|
||||
#define NOCHAR 0xFFFF
|
||||
#define MULTIC 0xFFFE
|
||||
#define DBCINV 0xFFFD
|
||||
|
||||
/* shorter macros to save source size of mapping tables */
|
||||
#define U UNIINV
|
||||
#define N NOCHAR
|
||||
#define M MULTIC
|
||||
#define D DBCINV
|
||||
|
||||
struct dbcs_index {
|
||||
const ucs2_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const Py_UCS4 *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
||||
struct unim_index {
|
||||
const DBCHAR *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct unim_index encode_map;
|
||||
|
||||
struct unim_index_bytebased {
|
||||
const unsigned char *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
|
||||
struct dbcs_map {
|
||||
const char *charset;
|
||||
const struct unim_index *encmap;
|
||||
const struct dbcs_index *decmap;
|
||||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
Py_UCS4 uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
static const MultibyteCodec *codec_list;
|
||||
static const struct dbcs_map *mapping_list;
|
||||
|
||||
#define CODEC_INIT(encoding) \
|
||||
static int encoding##_codec_init(const void *config)
|
||||
|
||||
#define ENCODER_INIT(encoding) \
|
||||
static int encoding##_encode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define ENCODER(encoding) \
|
||||
static Py_ssize_t encoding##_encode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
int kind, void *data, \
|
||||
Py_ssize_t *inpos, Py_ssize_t inlen, \
|
||||
unsigned char **outbuf, Py_ssize_t outleft, int flags)
|
||||
#define ENCODER_RESET(encoding) \
|
||||
static Py_ssize_t encoding##_encode_reset( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
unsigned char **outbuf, Py_ssize_t outleft)
|
||||
|
||||
#define DECODER_INIT(encoding) \
|
||||
static int encoding##_decode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define DECODER(encoding) \
|
||||
static Py_ssize_t encoding##_decode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const unsigned char **inbuf, Py_ssize_t inleft, \
|
||||
_PyUnicodeWriter *writer)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static Py_ssize_t encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
do { \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i); \
|
||||
} while (0)
|
||||
#define NEXT_INCHAR(i) \
|
||||
do { \
|
||||
(*inpos) += (i); \
|
||||
} while (0)
|
||||
#define NEXT_OUT(o) \
|
||||
do { \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o); \
|
||||
} while (0)
|
||||
#define NEXT(i, o) \
|
||||
do { \
|
||||
NEXT_INCHAR(i); \
|
||||
NEXT_OUT(o); \
|
||||
} while (0)
|
||||
|
||||
#define REQUIRE_INBUF(n) \
|
||||
do { \
|
||||
if (inleft < (n)) \
|
||||
return MBERR_TOOFEW; \
|
||||
} while (0)
|
||||
|
||||
#define REQUIRE_OUTBUF(n) \
|
||||
do { \
|
||||
if (outleft < (n)) \
|
||||
return MBERR_TOOSMALL; \
|
||||
} while (0)
|
||||
|
||||
#define INBYTE1 ((*inbuf)[0])
|
||||
#define INBYTE2 ((*inbuf)[1])
|
||||
#define INBYTE3 ((*inbuf)[2])
|
||||
#define INBYTE4 ((*inbuf)[3])
|
||||
|
||||
#define INCHAR1 (PyUnicode_READ(kind, data, *inpos))
|
||||
#define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1))
|
||||
|
||||
#define OUTCHAR(c) \
|
||||
do { \
|
||||
if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
|
||||
return MBERR_EXCEPTION; \
|
||||
} while (0)
|
||||
|
||||
#define OUTCHAR2(c1, c2) \
|
||||
do { \
|
||||
Py_UCS4 _c1 = (c1); \
|
||||
Py_UCS4 _c2 = (c2); \
|
||||
if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
|
||||
return MBERR_EXCEPTION; \
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
|
||||
writer->pos += 2; \
|
||||
} while (0)
|
||||
|
||||
#define OUTBYTE1(c) \
|
||||
do { ((*outbuf)[0]) = (c); } while (0)
|
||||
#define OUTBYTE2(c) \
|
||||
do { ((*outbuf)[1]) = (c); } while (0)
|
||||
#define OUTBYTE3(c) \
|
||||
do { ((*outbuf)[2]) = (c); } while (0)
|
||||
#define OUTBYTE4(c) \
|
||||
do { ((*outbuf)[3]) = (c); } while (0)
|
||||
|
||||
#define WRITEBYTE1(c1) \
|
||||
do { \
|
||||
REQUIRE_OUTBUF(1); \
|
||||
(*outbuf)[0] = (c1); \
|
||||
} while (0)
|
||||
#define WRITEBYTE2(c1, c2) \
|
||||
do { \
|
||||
REQUIRE_OUTBUF(2); \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
} while (0)
|
||||
#define WRITEBYTE3(c1, c2, c3) \
|
||||
do { \
|
||||
REQUIRE_OUTBUF(3); \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
} while (0)
|
||||
#define WRITEBYTE4(c1, c2, c3, c4) \
|
||||
do { \
|
||||
REQUIRE_OUTBUF(4); \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4); \
|
||||
} while (0)
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != NOCHAR)
|
||||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
|
||||
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
((m)->map != NULL && \
|
||||
(val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
|
||||
|
||||
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
|
||||
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
|
||||
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
|
||||
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
|
||||
#define END_MAPPINGS_LIST \
|
||||
{"", NULL, NULL} }; \
|
||||
static const struct dbcs_map *mapping_list = \
|
||||
(const struct dbcs_map *)_mapping_list;
|
||||
|
||||
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
|
||||
#define _STATEFUL_METHODS(enc) \
|
||||
enc##_encode, \
|
||||
enc##_encode_init, \
|
||||
enc##_encode_reset, \
|
||||
enc##_decode, \
|
||||
enc##_decode_init, \
|
||||
enc##_decode_reset,
|
||||
#define _STATELESS_METHODS(enc) \
|
||||
enc##_encode, NULL, NULL, \
|
||||
enc##_decode, NULL, NULL,
|
||||
#define CODEC_STATEFUL(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATEFUL_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS_WINIT(enc) { \
|
||||
#enc, NULL, \
|
||||
enc##_codec_init, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define END_CODECS_LIST \
|
||||
{"", NULL,} }; \
|
||||
static const MultibyteCodec *codec_list = \
|
||||
(const MultibyteCodec *)_codec_list;
|
||||
|
||||
|
||||
|
||||
static PyObject *
|
||||
getmultibytecodec(void)
|
||||
{
|
||||
static PyObject *cofunc = NULL;
|
||||
|
||||
if (cofunc == NULL) {
|
||||
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
|
||||
if (mod == NULL)
|
||||
return NULL;
|
||||
cofunc = PyObject_GetAttrString(mod, "__create_codec");
|
||||
Py_DECREF(mod);
|
||||
}
|
||||
return cofunc;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
getcodec(PyObject *self, PyObject *encoding)
|
||||
{
|
||||
PyObject *codecobj, *r, *cofunc;
|
||||
const MultibyteCodec *codec;
|
||||
const char *enc;
|
||||
|
||||
if (!PyUnicode_Check(encoding)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoding name must be a string.");
|
||||
return NULL;
|
||||
}
|
||||
enc = PyUnicode_AsUTF8(encoding);
|
||||
if (enc == NULL)
|
||||
return NULL;
|
||||
|
||||
cofunc = getmultibytecodec();
|
||||
if (cofunc == NULL)
|
||||
return NULL;
|
||||
|
||||
for (codec = codec_list; codec->encoding[0]; codec++)
|
||||
if (strcmp(codec->encoding, enc) == 0)
|
||||
break;
|
||||
|
||||
if (codec->encoding[0] == '\0') {
|
||||
PyErr_SetString(PyExc_LookupError,
|
||||
"no such codec is supported.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
|
||||
if (codecobj == NULL)
|
||||
return NULL;
|
||||
|
||||
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
|
||||
Py_DECREF(codecobj);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
static int
|
||||
register_maps(PyObject *module)
|
||||
{
|
||||
const struct dbcs_map *h;
|
||||
|
||||
for (h = mapping_list; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
int r;
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
r = PyModule_AddObject(module, mhname,
|
||||
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
|
||||
if (r == -1)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR
|
||||
find_pairencmap(ucs2_t body, ucs2_t modifier,
|
||||
const struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
Py_UCS4 value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
||||
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) {
|
||||
if (value < haystack[pos].uniseq) {
|
||||
if (max != pos) {
|
||||
max = pos;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (value > haystack[pos].uniseq) {
|
||||
if (min != pos) {
|
||||
min = pos;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (value == haystack[pos].uniseq) {
|
||||
return haystack[pos].code;
|
||||
}
|
||||
return DBCINV;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USING_IMPORTED_MAPS
|
||||
#define IMPORT_MAP(locale, charset, encmap, decmap) \
|
||||
importmap("_codecs_" #locale, "__map_" #charset, \
|
||||
(const void**)encmap, (const void**)decmap)
|
||||
|
||||
static int
|
||||
importmap(const char *modname, const char *symbol,
|
||||
const void **encmap, const void **decmap)
|
||||
{
|
||||
PyObject *o, *mod;
|
||||
|
||||
mod = PyImport_ImportModule(modname);
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
|
||||
o = PyObject_GetAttrString(mod, symbol);
|
||||
if (o == NULL)
|
||||
goto errorexit;
|
||||
else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"map data must be a Capsule.");
|
||||
goto errorexit;
|
||||
}
|
||||
else {
|
||||
struct dbcs_map *map;
|
||||
map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
|
||||
if (encmap != NULL)
|
||||
*encmap = map->encmap;
|
||||
if (decmap != NULL)
|
||||
*decmap = map->decmap;
|
||||
Py_DECREF(o);
|
||||
}
|
||||
|
||||
Py_DECREF(mod);
|
||||
return 0;
|
||||
|
||||
errorexit:
|
||||
Py_DECREF(mod);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define I_AM_A_MODULE_FOR(loc) \
|
||||
static struct PyModuleDef __module = { \
|
||||
PyModuleDef_HEAD_INIT, \
|
||||
"_codecs_"#loc, \
|
||||
NULL, \
|
||||
0, \
|
||||
__methods, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL \
|
||||
}; \
|
||||
PyMODINIT_FUNC \
|
||||
PyInit__codecs_##loc(void) \
|
||||
{ \
|
||||
PyObject *m = PyModule_Create(&__module); \
|
||||
if (m != NULL) \
|
||||
(void)register_maps(m); \
|
||||
return m; \
|
||||
}
|
||||
|
||||
#endif
|
333
third_party/python/Modules/cjkcodecs/clinic/multibytecodec.c.h
vendored
Normal file
333
third_party/python/Modules/cjkcodecs/clinic/multibytecodec.c.h
vendored
Normal file
|
@ -0,0 +1,333 @@
|
|||
/*[clinic input]
|
||||
preserve
|
||||
[clinic start generated code]*/
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteCodec_encode__doc__,
|
||||
"encode($self, /, input, errors=None)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Return an encoded string version of `input\'.\n"
|
||||
"\n"
|
||||
"\'errors\' may be given to set a different error handling scheme. Default is\n"
|
||||
"\'strict\' meaning that encoding errors raise a UnicodeEncodeError. Other possible\n"
|
||||
"values are \'ignore\', \'replace\' and \'xmlcharrefreplace\' as well as any other name\n"
|
||||
"registered with codecs.register_error that can handle UnicodeEncodeErrors.");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF \
|
||||
{"encode", (PyCFunction)_multibytecodec_MultibyteCodec_encode, METH_FASTCALL, _multibytecodec_MultibyteCodec_encode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
|
||||
PyObject *input,
|
||||
const char *errors);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteCodec_encode(MultibyteCodecObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"input", "errors", NULL};
|
||||
static _PyArg_Parser _parser = {"O|z:encode", _keywords, 0};
|
||||
PyObject *input;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
|
||||
&input, &errors)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteCodec_encode_impl(self, input, errors);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteCodec_decode__doc__,
|
||||
"decode($self, /, input, errors=None)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Decodes \'input\'.\n"
|
||||
"\n"
|
||||
"\'errors\' may be given to set a different error handling scheme. Default is\n"
|
||||
"\'strict\' meaning that encoding errors raise a UnicodeDecodeError. Other possible\n"
|
||||
"values are \'ignore\' and \'replace\' as well as any other name registered with\n"
|
||||
"codecs.register_error that is able to handle UnicodeDecodeErrors.\"");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF \
|
||||
{"decode", (PyCFunction)_multibytecodec_MultibyteCodec_decode, METH_FASTCALL, _multibytecodec_MultibyteCodec_decode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
|
||||
Py_buffer *input,
|
||||
const char *errors);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteCodec_decode(MultibyteCodecObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"input", "errors", NULL};
|
||||
static _PyArg_Parser _parser = {"y*|z:decode", _keywords, 0};
|
||||
Py_buffer input = {NULL, NULL};
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
|
||||
&input, &errors)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteCodec_decode_impl(self, &input, errors);
|
||||
|
||||
exit:
|
||||
/* Cleanup for input */
|
||||
if (input.obj) {
|
||||
PyBuffer_Release(&input);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_encode__doc__,
|
||||
"encode($self, /, input, final=False)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF \
|
||||
{"encode", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_encode, METH_FASTCALL, _multibytecodec_MultibyteIncrementalEncoder_encode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
|
||||
PyObject *input,
|
||||
int final);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_encode(MultibyteIncrementalEncoderObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"input", "final", NULL};
|
||||
static _PyArg_Parser _parser = {"O|i:encode", _keywords, 0};
|
||||
PyObject *input;
|
||||
int final = 0;
|
||||
|
||||
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
|
||||
&input, &final)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteIncrementalEncoder_encode_impl(self, input, final);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF \
|
||||
{"reset", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_reset, METH_NOARGS, _multibytecodec_MultibyteIncrementalEncoder_reset__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalEncoder_reset(MultibyteIncrementalEncoderObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteIncrementalEncoder_reset_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_decode__doc__,
|
||||
"decode($self, /, input, final=False)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF \
|
||||
{"decode", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_decode, METH_FASTCALL, _multibytecodec_MultibyteIncrementalDecoder_decode__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
|
||||
Py_buffer *input,
|
||||
int final);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_decode(MultibyteIncrementalDecoderObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"input", "final", NULL};
|
||||
static _PyArg_Parser _parser = {"y*|i:decode", _keywords, 0};
|
||||
Py_buffer input = {NULL, NULL};
|
||||
int final = 0;
|
||||
|
||||
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
|
||||
&input, &final)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteIncrementalDecoder_decode_impl(self, &input, final);
|
||||
|
||||
exit:
|
||||
/* Cleanup for input */
|
||||
if (input.obj) {
|
||||
PyBuffer_Release(&input);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF \
|
||||
{"reset", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_reset, METH_NOARGS, _multibytecodec_MultibyteIncrementalDecoder_reset__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteIncrementalDecoder_reset(MultibyteIncrementalDecoderObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteIncrementalDecoder_reset_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_read__doc__,
|
||||
"read($self, sizeobj=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF \
|
||||
{"read", (PyCFunction)_multibytecodec_MultibyteStreamReader_read, METH_VARARGS, _multibytecodec_MultibyteStreamReader_read__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
|
||||
PyObject *sizeobj);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_read(MultibyteStreamReaderObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *sizeobj = Py_None;
|
||||
|
||||
if (!PyArg_UnpackTuple(args, "read",
|
||||
0, 1,
|
||||
&sizeobj)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteStreamReader_read_impl(self, sizeobj);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_readline__doc__,
|
||||
"readline($self, sizeobj=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF \
|
||||
{"readline", (PyCFunction)_multibytecodec_MultibyteStreamReader_readline, METH_VARARGS, _multibytecodec_MultibyteStreamReader_readline__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
|
||||
PyObject *sizeobj);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_readline(MultibyteStreamReaderObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *sizeobj = Py_None;
|
||||
|
||||
if (!PyArg_UnpackTuple(args, "readline",
|
||||
0, 1,
|
||||
&sizeobj)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteStreamReader_readline_impl(self, sizeobj);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_readlines__doc__,
|
||||
"readlines($self, sizehintobj=None, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF \
|
||||
{"readlines", (PyCFunction)_multibytecodec_MultibyteStreamReader_readlines, METH_VARARGS, _multibytecodec_MultibyteStreamReader_readlines__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
|
||||
PyObject *sizehintobj);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *sizehintobj = Py_None;
|
||||
|
||||
if (!PyArg_UnpackTuple(args, "readlines",
|
||||
0, 1,
|
||||
&sizehintobj)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _multibytecodec_MultibyteStreamReader_readlines_impl(self, sizehintobj);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF \
|
||||
{"reset", (PyCFunction)_multibytecodec_MultibyteStreamReader_reset, METH_NOARGS, _multibytecodec_MultibyteStreamReader_reset__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamReader_reset(MultibyteStreamReaderObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteStreamReader_reset_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_write__doc__,
|
||||
"write($self, strobj, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF \
|
||||
{"write", (PyCFunction)_multibytecodec_MultibyteStreamWriter_write, METH_O, _multibytecodec_MultibyteStreamWriter_write__doc__},
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_writelines__doc__,
|
||||
"writelines($self, lines, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF \
|
||||
{"writelines", (PyCFunction)_multibytecodec_MultibyteStreamWriter_writelines, METH_O, _multibytecodec_MultibyteStreamWriter_writelines__doc__},
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_reset__doc__,
|
||||
"reset($self, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF \
|
||||
{"reset", (PyCFunction)_multibytecodec_MultibyteStreamWriter_reset, METH_NOARGS, _multibytecodec_MultibyteStreamWriter_reset__doc__},
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self);
|
||||
|
||||
static PyObject *
|
||||
_multibytecodec_MultibyteStreamWriter_reset(MultibyteStreamWriterObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _multibytecodec_MultibyteStreamWriter_reset_impl(self);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_multibytecodec___create_codec__doc__,
|
||||
"__create_codec($module, arg, /)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \
|
||||
{"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__},
|
||||
/*[clinic end generated code: output=134b9e36cb985939 input=a9049054013a1b77]*/
|
54
third_party/python/Modules/cjkcodecs/emu_jisx0213_2000.h
vendored
Normal file
54
third_party/python/Modules/cjkcodecs/emu_jisx0213_2000.h
vendored
Normal file
|
@ -0,0 +1,54 @@
|
|||
/* These routines may be quite inefficient, but it's used only to emulate old
|
||||
* standards. */
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
|
||||
# define EMULATE_JISX0213_2000_ENCODE_INVALID 1
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
|
||||
if (config == (void *)2000 && ( \
|
||||
(c) == 0x9B1C || (c) == 0x4FF1 || \
|
||||
(c) == 0x525D || (c) == 0x541E || \
|
||||
(c) == 0x5653 || (c) == 0x59F8 || \
|
||||
(c) == 0x5C5B || (c) == 0x5E77 || \
|
||||
(c) == 0x7626 || (c) == 0x7E6B)) { \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
|
||||
} \
|
||||
else if (config == (void *)2000 && (c) == 0x9B1D) { \
|
||||
(assi) = 0x8000 | 0x7d3b; \
|
||||
}
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
|
||||
if (config == (void *)2000 && (c) == 0x20B9F) { \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
|
||||
}
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
|
||||
# define EMULATE_JISX0213_2000_DECODE_INVALID 2
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
|
||||
if (config == (void *)2000 && \
|
||||
(((c1) == 0x2E && (c2) == 0x21) || \
|
||||
((c1) == 0x2F && (c2) == 0x7E) || \
|
||||
((c1) == 0x4F && (c2) == 0x54) || \
|
||||
((c1) == 0x4F && (c2) == 0x7E) || \
|
||||
((c1) == 0x74 && (c2) == 0x27) || \
|
||||
((c1) == 0x7E && (c2) == 0x7A) || \
|
||||
((c1) == 0x7E && (c2) == 0x7B) || \
|
||||
((c1) == 0x7E && (c2) == 0x7C) || \
|
||||
((c1) == 0x7E && (c2) == 0x7D) || \
|
||||
((c1) == 0x7E && (c2) == 0x7E))) { \
|
||||
return EMULATE_JISX0213_2000_DECODE_INVALID; \
|
||||
}
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) { \
|
||||
OUTCHAR(0x9B1D); \
|
||||
}
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) { \
|
||||
(assi) = 0x9B1D; \
|
||||
}
|
||||
|
4103
third_party/python/Modules/cjkcodecs/mappings_cn.h
vendored
Normal file
4103
third_party/python/Modules/cjkcodecs/mappings_cn.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2378
third_party/python/Modules/cjkcodecs/mappings_hk.h
vendored
Normal file
2378
third_party/python/Modules/cjkcodecs/mappings_hk.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
59
third_party/python/Modules/cjkcodecs/mappings_jisx0213_pair.h
vendored
Normal file
59
third_party/python/Modules/cjkcodecs/mappings_jisx0213_pair.h
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
#define JISX0213_ENCPAIRS 46
|
||||
#ifdef EXTERN_JISX0213_PAIR
|
||||
static const struct widedbcs_index *jisx0213_pair_decmap;
|
||||
static const struct pair_encodemap *jisx0213_pair_encmap;
|
||||
#else
|
||||
static const Py_UCS4 __jisx0213_pair_decmap[49] = {
|
||||
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
|
||||
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
|
||||
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
|
||||
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
|
||||
};
|
||||
|
||||
static const struct widedbcs_index jisx0213_pair_decmap[256] = {
|
||||
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
|
||||
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
|
||||
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
|
||||
};
|
||||
|
||||
static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
|
||||
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
|
||||
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
|
||||
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
|
||||
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
|
||||
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
|
||||
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
|
||||
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
|
||||
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
|
||||
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
|
||||
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
|
||||
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
|
||||
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
|
||||
0x31f70000,0x2675},{0x31f7309a,0x2678},
|
||||
};
|
||||
#endif
|
4765
third_party/python/Modules/cjkcodecs/mappings_jp.h
vendored
Normal file
4765
third_party/python/Modules/cjkcodecs/mappings_jp.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
3251
third_party/python/Modules/cjkcodecs/mappings_kr.h
vendored
Normal file
3251
third_party/python/Modules/cjkcodecs/mappings_kr.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2633
third_party/python/Modules/cjkcodecs/mappings_tw.h
vendored
Normal file
2633
third_party/python/Modules/cjkcodecs/mappings_tw.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
1928
third_party/python/Modules/cjkcodecs/multibytecodec.c
vendored
Normal file
1928
third_party/python/Modules/cjkcodecs/multibytecodec.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
136
third_party/python/Modules/cjkcodecs/multibytecodec.h
vendored
Normal file
136
third_party/python/Modules/cjkcodecs/multibytecodec.h
vendored
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* multibytecodec.h: Common Multibyte Codec Implementation
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
||||
#define _PYTHON_MULTIBYTECODEC_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef uint16_t
|
||||
typedef uint16_t ucs2_t, DBCHAR;
|
||||
#else
|
||||
typedef unsigned short ucs2_t, DBCHAR;
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
void *p;
|
||||
int i;
|
||||
unsigned char c[8];
|
||||
ucs2_t u2[4];
|
||||
Py_UCS4 u4[2];
|
||||
} MultibyteCodec_State;
|
||||
|
||||
typedef int (*mbcodec_init)(const void *config);
|
||||
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
int kind, void *data,
|
||||
Py_ssize_t *inpos, Py_ssize_t inlen,
|
||||
unsigned char **outbuf, Py_ssize_t outleft,
|
||||
int flags);
|
||||
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
unsigned char **outbuf, Py_ssize_t outleft);
|
||||
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
const unsigned char **inbuf, Py_ssize_t inleft,
|
||||
_PyUnicodeWriter *writer);
|
||||
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
|
||||
typedef struct {
|
||||
const char *encoding;
|
||||
const void *config;
|
||||
mbcodec_init codecinit;
|
||||
mbencode_func encode;
|
||||
mbencodeinit_func encinit;
|
||||
mbencodereset_func encreset;
|
||||
mbdecode_func decode;
|
||||
mbdecodeinit_func decinit;
|
||||
mbdecodereset_func decreset;
|
||||
} MultibyteCodec;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
MultibyteCodec *codec;
|
||||
} MultibyteCodecObject;
|
||||
|
||||
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
|
||||
|
||||
#define _MultibyteStatefulCodec_HEAD \
|
||||
PyObject_HEAD \
|
||||
MultibyteCodec *codec; \
|
||||
MultibyteCodec_State state; \
|
||||
PyObject *errors;
|
||||
typedef struct {
|
||||
_MultibyteStatefulCodec_HEAD
|
||||
} MultibyteStatefulCodecContext;
|
||||
|
||||
#define MAXENCPENDING 2
|
||||
#define _MultibyteStatefulEncoder_HEAD \
|
||||
_MultibyteStatefulCodec_HEAD \
|
||||
PyObject *pending;
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
} MultibyteStatefulEncoderContext;
|
||||
|
||||
#define MAXDECPENDING 8
|
||||
#define _MultibyteStatefulDecoder_HEAD \
|
||||
_MultibyteStatefulCodec_HEAD \
|
||||
unsigned char pending[MAXDECPENDING]; \
|
||||
Py_ssize_t pendingsize;
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
} MultibyteStatefulDecoderContext;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
} MultibyteIncrementalEncoderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
} MultibyteIncrementalDecoderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
PyObject *stream;
|
||||
} MultibyteStreamReaderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
PyObject *stream;
|
||||
} MultibyteStreamWriterObject;
|
||||
|
||||
/* positive values for illegal sequences */
|
||||
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
|
||||
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
|
||||
#define MBERR_INTERNAL (-3) /* internal runtime error */
|
||||
#define MBERR_EXCEPTION (-4) /* an exception has been raised */
|
||||
|
||||
#define ERROR_STRICT (PyObject *)(1)
|
||||
#define ERROR_IGNORE (PyObject *)(2)
|
||||
#define ERROR_REPLACE (PyObject *)(3)
|
||||
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
|
||||
#define ERROR_DECREF(p) \
|
||||
do { \
|
||||
if (p != NULL && ERROR_ISCUSTOM(p)) \
|
||||
Py_DECREF(p); \
|
||||
} while (0);
|
||||
|
||||
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
|
||||
#define MBENC_MAX MBENC_FLUSH
|
||||
|
||||
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue