python-3.6.zip added from Github

README.cosmo contains the necessary links.
This commit is contained in:
ahgamut 2021-08-08 09:38:33 +05:30 committed by Justine Tunney
parent 75fc601ff5
commit 0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions

View file

@ -0,0 +1,79 @@
To generate or modify mapping headers
-------------------------------------
Mapping headers are imported from CJKCodecs as pre-generated form.
If you need to tweak or add something on it, please look at tools/
subdirectory of CJKCodecs' distribution.
Notes on implmentation characteristics of each codecs
-----------------------------------------------------
1) Big5 codec
The big5 codec maps the following characters as cp950 does rather
than conforming Unicode.org's that maps to 0xFFFD.
BIG5 Unicode Description
0xA15A 0x2574 SPACING UNDERSCORE
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
big5 codes already, a roundtrip compatibility is not guaranteed for
them.
2) cp932 codec
To conform to Windows's real mapping, cp932 codec maps the following
codepoints in addition of the official cp932 mapping.
CP932 Unicode Description
0x80 0x80 UNDEFINED
0xA0 0xF8F0 UNDEFINED
0xFD 0xF8F1 UNDEFINED
0xFE 0xF8F2 UNDEFINED
0xFF 0xF8F3 UNDEFINED
3) euc-jisx0213 codec
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
is shown as a full width character, mapping to U+FF3C can make
more sense.
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
overlapped by each other, it doesn't bother standard conformations
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
sessions, the codec will try to encode kanji characters in this
order:
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
4) euc-jp codec
The euc-jp codec is a compatibility instance on these points:
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
5) shift-jis codec
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
instead of using JIS X 0201 for compatibility. The differences are:
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.

View file

@ -0,0 +1,464 @@
/*
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_cn.h"
/**
* hz is predefined as 100 on AIX. So we undefine it to avoid
* conflict against hz codec's.
*/
#ifdef _AIX
#undef hz
#endif
/* GBK and GB2312 map differently in few code points that are listed below:
*
* gb2312 gbk
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
* A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
* A844 undefined U+2015 HORIZONTAL BAR
*/
#define GBK_DECODE(dc1, dc2, writer) \
if ((dc1) == 0xa1 && (dc2) == 0xaa) { \
OUTCHAR(0x2014); \
} \
else if ((dc1) == 0xa8 && (dc2) == 0x44) { \
OUTCHAR(0x2015); \
} \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) { \
OUTCHAR(0x00b7); \
} \
else if (TRYMAP_DEC(gb2312, decoded, dc1 ^ 0x80, dc2 ^ 0x80)) { \
OUTCHAR(decoded); \
} \
else if (TRYMAP_DEC(gbkext, decoded, dc1, dc2)) { \
OUTCHAR(decoded); \
}
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) { \
(assi) = 0xa1aa; \
} else if ((code) == 0x2015) { \
(assi) = 0xa844; \
} else if ((code) == 0x00b7) { \
(assi) = 0xa1a4; \
} else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \
; \
}
/*
* GB2312 codec
*/
ENCODER(gb2312)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(gbcommon, code, c))
;
else
return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUTBYTE1((code >> 8) | 0x80);
OUTBYTE2((code & 0xFF) | 0x80);
NEXT(1, 2);
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else
return 1;
}
return 0;
}
/*
* GBK codec
*/
ENCODER(gbk)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
GBK_ENCODE(c, code)
else
return 1;
OUTBYTE1((code >> 8) | 0x80);
if (code & 0x8000)
OUTBYTE2((code & 0xFF)); /* MSB set: GBK */
else
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
NEXT(1, 2);
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
GBK_DECODE(c, INBYTE2, writer)
else
return 1;
NEXT_IN(2);
}
return 0;
}
/*
* GB18030 codec
*/
ENCODER(gb18030)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1(c);
NEXT(1, 1);
continue;
}
if (c >= 0x10000) {
Py_UCS4 tc = c - 0x10000;
assert (c <= 0x10FFFF);
REQUIRE_OUTBUF(4);
OUTBYTE4((unsigned char)(tc % 10) + 0x30);
tc /= 10;
OUTBYTE3((unsigned char)(tc % 126) + 0x81);
tc /= 126;
OUTBYTE2((unsigned char)(tc % 10) + 0x30);
tc /= 10;
OUTBYTE1((unsigned char)(tc + 0x90));
NEXT(1, 4);
continue;
}
REQUIRE_OUTBUF(2);
GBK_ENCODE(c, code)
else if (TRYMAP_ENC(gb18030ext, code, c))
;
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
REQUIRE_OUTBUF(4);
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
Py_UCS4 tc;
tc = c - utrrange->first +
utrrange->base;
OUTBYTE4((unsigned char)(tc % 10) + 0x30);
tc /= 10;
OUTBYTE3((unsigned char)(tc % 126) + 0x81);
tc /= 126;
OUTBYTE2((unsigned char)(tc % 10) + 0x30);
tc /= 10;
OUTBYTE1((unsigned char)tc + 0x81);
NEXT(1, 4);
break;
}
if (utrrange->first == 0)
return 1;
continue;
}
OUTBYTE1((code >> 8) | 0x80);
if (code & 0x8000)
OUTBYTE2((code & 0xFF)); /* MSB set: GBK or GB18030ext */
else
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */
NEXT(1, 2);
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
Py_UCS4 lseq;
REQUIRE_INBUF(4);
c3 = INBYTE3;
c4 = INBYTE4;
if (c < 0x81 || c > 0xFE ||
c3 < 0x81 || c3 > 0xFE ||
c4 < 0x30 || c4 > 0x39)
return 1;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
(Py_UCS4)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUTCHAR(utr->first - utr->base + lseq);
NEXT_IN(4);
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
* 1260 + (Py_UCS4)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
OUTCHAR(lseq);
NEXT_IN(4);
continue;
}
}
return 1;
}
GBK_DECODE(c, c2, writer)
else if (TRYMAP_DEC(gb18030ext, decoded, c, c2))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}
return 0;
}
/*
* HZ codec
*/
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITEBYTE2('~', '}');
state->i = 0;
NEXT_OUT(2);
}
return 0;
}
ENCODER(hz)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
if (state->i) {
WRITEBYTE2('~', '}');
NEXT_OUT(2);
state->i = 0;
}
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
if (c == '~') {
WRITEBYTE1('~');
NEXT_OUT(1);
}
continue;
}
if (c > 0xFFFF)
return 1;
if (TRYMAP_ENC(gbcommon, code, c))
;
else
return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITEBYTE4('~', '{', code >> 8, code & 0xff);
NEXT(1, 4);
state->i = 1;
}
else {
WRITEBYTE2(code >> 8, code & 0xff);
NEXT(1, 2);
}
}
return 0;
}
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c == '~') {
unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2);
if (c2 == '~' && state->i == 0)
OUTCHAR('~');
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '\n' && state->i == 0)
; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else
return 1;
NEXT_IN(2);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
OUTCHAR(c);
NEXT_IN(1);
}
else { /* GB mode */
REQUIRE_INBUF(2);
if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else
return 1;
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
CODEC_STATEFUL(hz)
END_CODECS_LIST
I_AM_A_MODULE_FOR(cn)

View file

@ -0,0 +1,191 @@
/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
/*
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
* U+00CA U+030C -> 8864
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
* U+00EA U+030C -> 88a5
* These are handled by not mapping tables but a hand-written code.
*/
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
ENCODER(big5hkscs)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1);
**outbuf = (unsigned char)c;
NEXT(1, 1);
continue;
}
insize = 1;
REQUIRE_OUTBUF(2);
if (c < 0x10000) {
if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
if (code == MULTIC) {
Py_UCS4 c2;
if (inlen - *inpos >= 2)
c2 = INCHAR2;
else
c2 = 0;
if (inlen - *inpos >= 2 &&
((c & 0xffdf) == 0x00ca) &&
((c2 & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[
((c >> 4) |
(c2 >> 3)) & 3];
insize = 2;
}
else if (inlen - *inpos < 2 &&
!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
else {
if (c == 0xca)
code = 0x8866;
else /* c == 0xea */
code = 0x88a7;
}
}
}
else if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
;
else
return insize;
}
else
return insize;
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xFF);
NEXT(insize, 2);
}
return 0;
}
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
}
}
if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
{
int s = BH2S(c, INBYTE2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
OUTCHAR(decoded | 0x20000);
NEXT_IN(2);
}
else {
OUTCHAR(decoded);
NEXT_IN(2);
}
continue;
}
switch ((c << 8) | INBYTE2) {
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
default: return 1;
}
NEXT_IN(2); /* all decoded code points are pairs, above. */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,760 @@
/*
* _codecs_jp.c: Codecs collection for Japanese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_BINARY_PAIR_SEARCH
#define EMPBASE 0x20000
#include "cjkcodecs.h"
#include "mappings_jp.h"
#include "mappings_jisx0213_pair.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
/*
* CP932 codec
*/
ENCODER(cp932)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
WRITEBYTE1(c - 0xfec0);
NEXT(1, 1);
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatibility */
REQUIRE_OUTBUF(1);
if (c == 0xf8f0)
OUTBYTE1(0xa0);
else
OUTBYTE1(c - 0xf8f1 + 0xfd);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(cp932ext, code, c)) {
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xff);
}
else if (TRYMAP_ENC(jisxcommon, code, c)) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UCS4)(c - 0xe000) / 188;
c2 = (Py_UCS4)(c - 0xe000) % 188;
OUTBYTE1(c1 + 0xf0);
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
}
else
return 1;
NEXT(1, 2);
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c <= 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUTCHAR(0xf8f0); /* half-width katakana */
else
OUTCHAR(0xfec0 + c);
NEXT_IN(1);
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUTCHAR(0xf8f1 - 0xfd + c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (TRYMAP_DEC(cp932ext, decoded, c, c2))
OUTCHAR(decoded);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (TRYMAP_DEC(jisx0208, decoded, c, c2))
OUTCHAR(decoded);
else
return 1;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
OUTCHAR(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
else
return 1;
}
else
return 1;
NEXT_IN(2);
}
return 0;
}
/*
* EUC-JIS-2004 codec
*/
ENCODER(euc_jis_2004)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
WRITEBYTE1(c);
NEXT(1, 1);
continue;
}
insize = 1;
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
if (code == MULTIC) {
if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
Py_UCS4 c2 = INCHAR2;
code = find_pairencmap(
(ucs2_t)c, c2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
}
else if (TRYMAP_ENC(jisxcommon, code, c))
;
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITEBYTE2(0x8e, c - 0xfec0);
NEXT(1, 2);
continue;
}
else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES) */
code = 0x2232;
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff))
;
else
return insize;
}
else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
NEXT(insize, 3);
} else {
/* Codeset 1 */
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
NEXT(insize, 2);
}
}
return 0;
}
DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 code, decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2);
NEXT_IN(2);
}
else
return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3);
c2 = INBYTE2 ^ 0x80;
c3 = INBYTE3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c2, c3))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_2_emp, code, c2, c3)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(3);
continue;
}
else if (TRYMAP_DEC(jisx0212, decoded, c2, c3))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(3);
}
else {
unsigned char c2;
REQUIRE_INBUF(2);
c ^= 0x80;
c2 = INBYTE2 ^ 0x80;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
else if (c == 0x21 && c2 == 0x40)
OUTCHAR(0xff3c);
else if (c == 0x22 && c2 == 0x32)
OUTCHAR(0xff5e);
else if (TRYMAP_DEC(jisx0208, decoded, c, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_emp, code, c, c2)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(2);
continue;
}
else if (TRYMAP_DEC(jisx0213_pair, code, c, c2)) {
OUTCHAR2(code >> 16, code & 0xffff);
NEXT_IN(2);
continue;
}
else
return 1;
NEXT_IN(2);
}
}
return 0;
}
/*
* EUC-JP codec
*/
ENCODER(euc_jp)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
if (TRYMAP_ENC(jisxcommon, code, c))
;
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITEBYTE2(0x8e, c - 0xfec0);
NEXT(1, 2);
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITEBYTE1(0x5c);
NEXT(1, 1);
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITEBYTE1(0x7e);
NEXT(1, 1);
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
NEXT(1, 3);
} else {
/* JIS X 0208 */
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
NEXT(1, 2);
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2);
NEXT_IN(2);
}
else
return 1;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3);
c2 = INBYTE2;
c3 = INBYTE3;
/* JIS X 0212 */
if (TRYMAP_DEC(jisx0212, decoded, c2 ^ 0x80, c3 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(3);
}
else
return 1;
}
else {
unsigned char c2;
REQUIRE_INBUF(2);
c2 = INBYTE2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
OUTCHAR(0xff3c);
else
#endif
if (TRYMAP_DEC(jisx0208, decoded, c ^ 0x80, c2 ^ 0x80))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}
}
return 0;
}
/*
* SHIFT_JIS codec
*/
ENCODER(shift_jis)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
unsigned char c1, c2;
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80)
code = c;
else if (c == 0x00a5)
code = 0x5c; /* YEN SIGN */
else if (c == 0x203e)
code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else if (c > 0xFFFF)
return 1;
else
code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1);
OUTBYTE1((unsigned char)code);
NEXT(1, 1);
continue;
}
REQUIRE_OUTBUF(2);
if (code == NOCHAR) {
if (TRYMAP_ENC(jisxcommon, code, c))
;
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
else
return 1;
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
}
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
NEXT(1, 2);
}
return 0;
}
DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, writer)
#else
if (c < 0x80)
OUTCHAR(c);
#endif
else JISX0201_K_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
OUTCHAR(0xff3c);
NEXT_IN(2);
continue;
}
#endif
if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
}
else
return 1;
}
else
return 1;
NEXT_IN(1); /* JIS X 0201 */
}
return 0;
}
/*
* SHIFT_JIS-2004 codec
*/
ENCODER(shift_jis_2004)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code = NOCHAR;
int c1, c2;
Py_ssize_t insize;
JISX0201_ENCODE(c, code)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
WRITEBYTE1((unsigned char)code);
NEXT(1, 1);
continue;
}
REQUIRE_OUTBUF(2);
insize = 1;
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
if (code == MULTIC) {
if (inlen - *inpos < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
Py_UCS4 ch2 = INCHAR2;
code = find_pairencmap(
(ucs2_t)c, ch2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
insize = 2;
}
}
}
else if (TRYMAP_ENC(jisxcommon, code, c)) {
/* abandon JIS X 0212 codes */
if (code & 0x8000)
return 1;
}
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff))
;
else
return insize;
}
else
return insize;
}
c1 = code >> 8;
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) {
/* Plane 2 */
if (c1 >= 0xee)
c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8)
c1 -= 0x49;
else
c1 -= 0x43;
}
else {
/* Plane 1 */
c1 -= 0x21;
}
if (c1 & 1)
c2 += 0x5e;
c1 >>= 1;
OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1));
OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41));
NEXT(insize, 2);
}
return 0;
}
DECODER(shift_jis_2004)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
JISX0201_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2;
Py_UCS4 code, decoded;
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
c1, c2)
else if (TRYMAP_DEC(jisx0208, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_emp, code, c1, c2))
OUTCHAR(EMPBASE | code);
else if (TRYMAP_DEC(jisx0213_pair, code, c1, c2))
OUTCHAR2(code >> 16, code & 0xffff);
else
return 1;
NEXT_IN(2);
}
else { /* Plane 2 */
if (c1 >= 0x67)
c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f)
c1 -= 0x37;
else
c1 -= 0x3d;
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
c1, c2)
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c1, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_2_emp, code, c1, c2)) {
OUTCHAR(EMPBASE | code);
NEXT_IN(2);
continue;
}
else
return 1;
NEXT_IN(2);
}
continue;
}
else
return 1;
NEXT_IN(1); /* JIS X 0201 */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
MAPPING_DECONLY(jisx0213_1_bmp)
MAPPING_DECONLY(jisx0213_2_bmp)
MAPPING_ENCONLY(jisx0213_bmp)
MAPPING_DECONLY(jisx0213_1_emp)
MAPPING_DECONLY(jisx0213_2_emp)
MAPPING_ENCONLY(jisx0213_emp)
MAPPING_ENCDEC(jisx0213_pair)
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
END_CODECS_LIST
I_AM_A_MODULE_FOR(jp)

View file

@ -0,0 +1,468 @@
/*
* _codecs_kr.c: Codecs collection for Korean encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_kr.h"
/*
* EUC-KR codec
*/
#define EUCKR_JAMO_FIRSTBYTE 0xA4
#define EUCKR_JAMO_FILLER 0xD4
static const unsigned char u2cgk_choseong[19] = {
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
0xbc, 0xbd, 0xbe
};
static const unsigned char u2cgk_jungseong[21] = {
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
0xcf, 0xd0, 0xd1, 0xd2, 0xd3
};
static const unsigned char u2cgk_jongseong[28] = {
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
0xbb, 0xbc, 0xbd, 0xbe
};
ENCODER(euc_kr)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(cp949, code, c))
;
else
return 1;
if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */
OUTBYTE1((code >> 8) | 0x80);
OUTBYTE2((code & 0xFF) | 0x80);
NEXT(1, 2);
}
else {
/* Mapping is found in CP949 extension,
but we encode it in KS X 1001:1998 Annex 3,
make-up sequence for EUC-KR. */
REQUIRE_OUTBUF(8);
/* syllable composition precedence */
OUTBYTE1(EUCKR_JAMO_FIRSTBYTE);
OUTBYTE2(EUCKR_JAMO_FILLER);
/* All code points in CP949 extension are in unicode
* Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00;
OUTBYTE3(EUCKR_JAMO_FIRSTBYTE);
OUTBYTE4(u2cgk_choseong[c / 588]);
NEXT_OUT(4);
OUTBYTE1(EUCKR_JAMO_FIRSTBYTE);
OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]);
OUTBYTE3(EUCKR_JAMO_FIRSTBYTE);
OUTBYTE4(u2cgk_jongseong[c % 28]);
NEXT(1, 4);
}
}
return 0;
}
#define NONE 127
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
0, 1, NONE, 2, NONE, NONE, 3, 4,
5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
6, 7, 8, NONE, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18
};
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
1, 2, 3, 4, 5, 6, 7, NONE,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, NONE, 18, 19, 20, 21, 22,
NONE, 23, 24, 25, 26, 27
};
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (c == EUCKR_JAMO_FIRSTBYTE &&
INBYTE2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong;
REQUIRE_INBUF(8);
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
return 1;
c = (*inbuf)[3];
if (0xa1 <= c && c <= 0xbe)
cho = cgk2u_choseong[c - 0xa1];
else
cho = NONE;
c = (*inbuf)[5];
jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
c = (*inbuf)[7];
if (c == EUCKR_JAMO_FILLER)
jong = 0;
else if (0xa1 <= c && c <= 0xbe)
jong = cgk2u_jongseong[c - 0xa1];
else
jong = NONE;
if (cho == NONE || jung == NONE || jong == NONE)
return 1;
OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
NEXT_IN(8);
}
else if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else
return 1;
}
return 0;
}
#undef NONE
/*
* CP949 codec
*/
ENCODER(cp949)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(cp949, code, c))
;
else
return 1;
OUTBYTE1((code >> 8) | 0x80);
if (code & 0x8000)
OUTBYTE2(code & 0xFF); /* MSB set: CP949 */
else
OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: ks x 1001 */
NEXT(1, 2);
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80))
OUTCHAR(decoded);
else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}
return 0;
}
/*
* JOHAB codec
*/
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
0x03, 0x04, 0x05, 0x06, 0x07,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
0x8741, 0x8761, 0x8781, 0x87a1,
};
ENCODER(johab)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
code = 0x8000 |
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
}
else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else if (TRYMAP_ENC(cp949, code, c)) {
unsigned char c1, c2, t2;
unsigned short t1;
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) ||
(c1 >= 0x4a && c1 <= 0x7d)) &&
(c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUTBYTE1(t1 >> 1);
OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43);
NEXT(1, 2);
continue;
}
else
return 1;
}
else
return 1;
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xff);
NEXT(1, 2);
}
return 0;
}
#define FILL 0xfd
#define NONE 0xff
static const unsigned char johabidx_choseong[32] = {
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};
static const unsigned char johabjamo_choseong[32] = {
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = INBYTE1, c2;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
c2 = INBYTE2;
if (c < 0xd8) {
/* johab hangul */
unsigned char c_cho, c_jung, c_jong;
unsigned char i_cho, i_jung, i_jong;
c_cho = (c >> 2) & 0x1f;
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
c_jong = c2 & 0x1f;
i_cho = johabidx_choseong[c_cho];
i_jung = johabidx_jungseong[c_jung];
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
return 1;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
OUTCHAR(0x3000);
else
OUTCHAR(0x3100 |
johabjamo_jongseong[c_jong]);
}
else {
if (i_jong == FILL)
OUTCHAR(0x3100 |
johabjamo_jungseong[c_jung]);
else
return 1;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUTCHAR(0x3100 |
johabjamo_choseong[c_cho]);
else
return 1;
}
else
OUTCHAR(0xac00 +
i_cho * 588 +
i_jung * 28 +
(i_jong == FILL ? 0 : i_jong));
}
NEXT_IN(2);
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
return 1;
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
if (TRYMAP_DEC(ksx1001, decoded, t1, t2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else {
return 1;
}
}
}
}
return 0;
}
#undef NONE
#undef FILL
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
END_CODECS_LIST
I_AM_A_MODULE_FOR(kr)

View file

@ -0,0 +1,143 @@
/*
* _codecs_tw.c: Codecs collection for Taiwan's encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#include "cjkcodecs.h"
#include "mappings_tw.h"
/*
* BIG5 codec
*/
ENCODER(big5)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1);
**outbuf = (unsigned char)c;
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xFF);
NEXT(1, 2);
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
}
else return 1;
}
return 0;
}
/*
* CP950 codec
*/
ENCODER(cp950)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
if (c < 0x80) {
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
continue;
}
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2);
if (TRYMAP_ENC(cp950ext, code, c))
;
else if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xFF);
NEXT(1, 2);
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(big5, decoded, c, INBYTE2))
OUTCHAR(decoded);
else
return 1;
NEXT_IN(2);
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
I_AM_A_MODULE_FOR(tw)

View file

@ -0,0 +1,65 @@
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) { \
(assi) = (c); \
} \
else if ((c) == 0x00a5) { \
(assi) = 0x5c; \
} \
else if ((c) == 0x203e) { \
(assi) = 0x7e; \
}
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) { \
(assi) = (c) - 0xfec0; \
}
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE_CHAR(c, assi) \
if ((c) < 0x5c) { \
(assi) = (c); \
} \
else if ((c) == 0x5c) { \
(assi) = 0x00a5; \
} \
else if ((c) < 0x7e) { \
(assi) = (c); \
} \
else if ((c) == 0x7e) { \
(assi) = 0x203e; \
} \
else if ((c) == 0x7f) { \
(assi) = 0x7f; \
}
#define JISX0201_R_DECODE(c, writer) \
if ((c) < 0x5c) { \
OUTCHAR(c); \
} \
else if ((c) == 0x5c) { \
OUTCHAR(0x00a5); \
} \
else if ((c) < 0x7e) { \
OUTCHAR(c); \
} \
else if ((c) == 0x7e) { \
OUTCHAR(0x203e); \
} \
else if ((c) == 0x7f) { \
OUTCHAR(0x7f); \
}
#define JISX0201_K_DECODE(c, writer) \
if ((c) >= 0xa1 && (c) <= 0xdf) { \
OUTCHAR(0xfec0 + (c)); \
}
#define JISX0201_K_DECODE_CHAR(c, assi) \
if ((c) >= 0xa1 && (c) <= 0xdf) { \
(assi) = 0xfec0 + (c); \
}
#define JISX0201_DECODE(c, writer) \
JISX0201_R_DECODE(c, writer) \
else JISX0201_K_DECODE(c, writer)

View file

@ -0,0 +1,417 @@
/*
* cjkcodecs.h: common header for cjkcodecs
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "multibytecodec.h"
/* a unicode "undefined" code point */
#define UNIINV 0xFFFE
/* internal-use DBCS code points which aren't used by any charsets */
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
/* shorter macros to save source size of mapping tables */
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV
struct dbcs_index {
const ucs2_t *map;
unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
const Py_UCS4 *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
const DBCHAR *map;
unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct unim_index_bytebased {
const unsigned char *map;
unsigned char bottom, top;
};
struct dbcs_map {
const char *charset;
const struct unim_index *encmap;
const struct dbcs_index *decmap;
};
struct pair_encodemap {
Py_UCS4 uniseq;
DBCHAR code;
};
static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state, const void *config)
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
int kind, void *data, \
Py_ssize_t *inpos, Py_ssize_t inlen, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
MultibyteCodec_State *state, const void *config, \
unsigned char **outbuf, Py_ssize_t outleft)
#define DECODER_INIT(encoding) \
static int encoding##_decode_init( \
MultibyteCodec_State *state, const void *config)
#define DECODER(encoding) \
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
_PyUnicodeWriter *writer)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
#define NEXT_IN(i) \
do { \
(*inbuf) += (i); \
(inleft) -= (i); \
} while (0)
#define NEXT_INCHAR(i) \
do { \
(*inpos) += (i); \
} while (0)
#define NEXT_OUT(o) \
do { \
(*outbuf) += (o); \
(outleft) -= (o); \
} while (0)
#define NEXT(i, o) \
do { \
NEXT_INCHAR(i); \
NEXT_OUT(o); \
} while (0)
#define REQUIRE_INBUF(n) \
do { \
if (inleft < (n)) \
return MBERR_TOOFEW; \
} while (0)
#define REQUIRE_OUTBUF(n) \
do { \
if (outleft < (n)) \
return MBERR_TOOSMALL; \
} while (0)
#define INBYTE1 ((*inbuf)[0])
#define INBYTE2 ((*inbuf)[1])
#define INBYTE3 ((*inbuf)[2])
#define INBYTE4 ((*inbuf)[3])
#define INCHAR1 (PyUnicode_READ(kind, data, *inpos))
#define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1))
#define OUTCHAR(c) \
do { \
if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
return MBERR_EXCEPTION; \
} while (0)
#define OUTCHAR2(c1, c2) \
do { \
Py_UCS4 _c1 = (c1); \
Py_UCS4 _c2 = (c2); \
if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
return MBERR_EXCEPTION; \
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
writer->pos += 2; \
} while (0)
#define OUTBYTE1(c) \
do { ((*outbuf)[0]) = (c); } while (0)
#define OUTBYTE2(c) \
do { ((*outbuf)[1]) = (c); } while (0)
#define OUTBYTE3(c) \
do { ((*outbuf)[2]) = (c); } while (0)
#define OUTBYTE4(c) \
do { ((*outbuf)[3]) = (c); } while (0)
#define WRITEBYTE1(c1) \
do { \
REQUIRE_OUTBUF(1); \
(*outbuf)[0] = (c1); \
} while (0)
#define WRITEBYTE2(c1, c2) \
do { \
REQUIRE_OUTBUF(2); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
} while (0)
#define WRITEBYTE3(c1, c2, c3) \
do { \
REQUIRE_OUTBUF(3); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
} while (0)
#define WRITEBYTE4(c1, c2, c3, c4) \
do { \
REQUIRE_OUTBUF(4); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4); \
} while (0)
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC(charset, assi, uni) \
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define _TRYMAP_DEC(m, assi, val) \
((m)->map != NULL && \
(val) >= (m)->bottom && \
(val)<= (m)->top && \
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST \
{"", NULL, NULL} }; \
static const struct dbcs_map *mapping_list = \
(const struct dbcs_map *)_mapping_list;
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
enc##_encode_reset, \
enc##_decode, \
enc##_decode_init, \
enc##_decode_reset,
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) { \
#enc, NULL, NULL, \
_STATEFUL_METHODS(enc) \
},
#define CODEC_STATELESS(enc) { \
#enc, NULL, NULL, \
_STATELESS_METHODS(enc) \
},
#define CODEC_STATELESS_WINIT(enc) { \
#enc, NULL, \
enc##_codec_init, \
_STATELESS_METHODS(enc) \
},
#define END_CODECS_LIST \
{"", NULL,} }; \
static const MultibyteCodec *codec_list = \
(const MultibyteCodec *)_codec_list;
static PyObject *
getmultibytecodec(void)
{
static PyObject *cofunc = NULL;
if (cofunc == NULL) {
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
if (mod == NULL)
return NULL;
cofunc = PyObject_GetAttrString(mod, "__create_codec");
Py_DECREF(mod);
}
return cofunc;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
PyObject *codecobj, *r, *cofunc;
const MultibyteCodec *codec;
const char *enc;
if (!PyUnicode_Check(encoding)) {
PyErr_SetString(PyExc_TypeError,
"encoding name must be a string.");
return NULL;
}
enc = PyUnicode_AsUTF8(encoding);
if (enc == NULL)
return NULL;
cofunc = getmultibytecodec();
if (cofunc == NULL)
return NULL;
for (codec = codec_list; codec->encoding[0]; codec++)
if (strcmp(codec->encoding, enc) == 0)
break;
if (codec->encoding[0] == '\0') {
PyErr_SetString(PyExc_LookupError,
"no such codec is supported.");
return NULL;
}
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
if (codecobj == NULL)
return NULL;
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
Py_DECREF(codecobj);
return r;
}
static struct PyMethodDef __methods[] = {
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
{NULL, NULL},
};
static int
register_maps(PyObject *module)
{
const struct dbcs_map *h;
for (h = mapping_list; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
int r;
strcpy(mhname + sizeof("__map_") - 1, h->charset);
r = PyModule_AddObject(module, mhname,
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
if (r == -1)
return -1;
}
return 0;
}
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
Py_UCS4 value = body << 16 | modifier;
min = 0;
max = haystacksize;
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) {
if (value < haystack[pos].uniseq) {
if (max != pos) {
max = pos;
continue;
}
}
else if (value > haystack[pos].uniseq) {
if (min != pos) {
min = pos;
continue;
}
}
break;
}
if (value == haystack[pos].uniseq) {
return haystack[pos].code;
}
return DBCINV;
}
#endif
#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
importmap("_codecs_" #locale, "__map_" #charset, \
(const void**)encmap, (const void**)decmap)
static int
importmap(const char *modname, const char *symbol,
const void **encmap, const void **decmap)
{
PyObject *o, *mod;
mod = PyImport_ImportModule(modname);
if (mod == NULL)
return -1;
o = PyObject_GetAttrString(mod, symbol);
if (o == NULL)
goto errorexit;
else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
PyErr_SetString(PyExc_ValueError,
"map data must be a Capsule.");
goto errorexit;
}
else {
struct dbcs_map *map;
map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
if (encmap != NULL)
*encmap = map->encmap;
if (decmap != NULL)
*decmap = map->decmap;
Py_DECREF(o);
}
Py_DECREF(mod);
return 0;
errorexit:
Py_DECREF(mod);
return -1;
}
#endif
#define I_AM_A_MODULE_FOR(loc) \
static struct PyModuleDef __module = { \
PyModuleDef_HEAD_INIT, \
"_codecs_"#loc, \
NULL, \
0, \
__methods, \
NULL, \
NULL, \
NULL, \
NULL \
}; \
PyMODINIT_FUNC \
PyInit__codecs_##loc(void) \
{ \
PyObject *m = PyModule_Create(&__module); \
if (m != NULL) \
(void)register_maps(m); \
return m; \
}
#endif

View file

@ -0,0 +1,333 @@
/*[clinic input]
preserve
[clinic start generated code]*/
PyDoc_STRVAR(_multibytecodec_MultibyteCodec_encode__doc__,
"encode($self, /, input, errors=None)\n"
"--\n"
"\n"
"Return an encoded string version of `input\'.\n"
"\n"
"\'errors\' may be given to set a different error handling scheme. Default is\n"
"\'strict\' meaning that encoding errors raise a UnicodeEncodeError. Other possible\n"
"values are \'ignore\', \'replace\' and \'xmlcharrefreplace\' as well as any other name\n"
"registered with codecs.register_error that can handle UnicodeEncodeErrors.");
#define _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF \
{"encode", (PyCFunction)_multibytecodec_MultibyteCodec_encode, METH_FASTCALL, _multibytecodec_MultibyteCodec_encode__doc__},
static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
PyObject *input,
const char *errors);
static PyObject *
_multibytecodec_MultibyteCodec_encode(MultibyteCodecObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"input", "errors", NULL};
static _PyArg_Parser _parser = {"O|z:encode", _keywords, 0};
PyObject *input;
const char *errors = NULL;
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
&input, &errors)) {
goto exit;
}
return_value = _multibytecodec_MultibyteCodec_encode_impl(self, input, errors);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteCodec_decode__doc__,
"decode($self, /, input, errors=None)\n"
"--\n"
"\n"
"Decodes \'input\'.\n"
"\n"
"\'errors\' may be given to set a different error handling scheme. Default is\n"
"\'strict\' meaning that encoding errors raise a UnicodeDecodeError. Other possible\n"
"values are \'ignore\' and \'replace\' as well as any other name registered with\n"
"codecs.register_error that is able to handle UnicodeDecodeErrors.\"");
#define _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF \
{"decode", (PyCFunction)_multibytecodec_MultibyteCodec_decode, METH_FASTCALL, _multibytecodec_MultibyteCodec_decode__doc__},
static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
Py_buffer *input,
const char *errors);
static PyObject *
_multibytecodec_MultibyteCodec_decode(MultibyteCodecObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"input", "errors", NULL};
static _PyArg_Parser _parser = {"y*|z:decode", _keywords, 0};
Py_buffer input = {NULL, NULL};
const char *errors = NULL;
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
&input, &errors)) {
goto exit;
}
return_value = _multibytecodec_MultibyteCodec_decode_impl(self, &input, errors);
exit:
/* Cleanup for input */
if (input.obj) {
PyBuffer_Release(&input);
}
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_encode__doc__,
"encode($self, /, input, final=False)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF \
{"encode", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_encode, METH_FASTCALL, _multibytecodec_MultibyteIncrementalEncoder_encode__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
PyObject *input,
int final);
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode(MultibyteIncrementalEncoderObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"input", "final", NULL};
static _PyArg_Parser _parser = {"O|i:encode", _keywords, 0};
PyObject *input;
int final = 0;
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
&input, &final)) {
goto exit;
}
return_value = _multibytecodec_MultibyteIncrementalEncoder_encode_impl(self, input, final);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_reset__doc__,
"reset($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF \
{"reset", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_reset, METH_NOARGS, _multibytecodec_MultibyteIncrementalEncoder_reset__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self);
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset(MultibyteIncrementalEncoderObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteIncrementalEncoder_reset_impl(self);
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_decode__doc__,
"decode($self, /, input, final=False)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF \
{"decode", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_decode, METH_FASTCALL, _multibytecodec_MultibyteIncrementalDecoder_decode__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
Py_buffer *input,
int final);
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode(MultibyteIncrementalDecoderObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"input", "final", NULL};
static _PyArg_Parser _parser = {"y*|i:decode", _keywords, 0};
Py_buffer input = {NULL, NULL};
int final = 0;
if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
&input, &final)) {
goto exit;
}
return_value = _multibytecodec_MultibyteIncrementalDecoder_decode_impl(self, &input, final);
exit:
/* Cleanup for input */
if (input.obj) {
PyBuffer_Release(&input);
}
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_reset__doc__,
"reset($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF \
{"reset", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_reset, METH_NOARGS, _multibytecodec_MultibyteIncrementalDecoder_reset__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self);
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset(MultibyteIncrementalDecoderObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteIncrementalDecoder_reset_impl(self);
}
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_read__doc__,
"read($self, sizeobj=None, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF \
{"read", (PyCFunction)_multibytecodec_MultibyteStreamReader_read, METH_VARARGS, _multibytecodec_MultibyteStreamReader_read__doc__},
static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
PyObject *sizeobj);
static PyObject *
_multibytecodec_MultibyteStreamReader_read(MultibyteStreamReaderObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *sizeobj = Py_None;
if (!PyArg_UnpackTuple(args, "read",
0, 1,
&sizeobj)) {
goto exit;
}
return_value = _multibytecodec_MultibyteStreamReader_read_impl(self, sizeobj);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_readline__doc__,
"readline($self, sizeobj=None, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF \
{"readline", (PyCFunction)_multibytecodec_MultibyteStreamReader_readline, METH_VARARGS, _multibytecodec_MultibyteStreamReader_readline__doc__},
static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
PyObject *sizeobj);
static PyObject *
_multibytecodec_MultibyteStreamReader_readline(MultibyteStreamReaderObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *sizeobj = Py_None;
if (!PyArg_UnpackTuple(args, "readline",
0, 1,
&sizeobj)) {
goto exit;
}
return_value = _multibytecodec_MultibyteStreamReader_readline_impl(self, sizeobj);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_readlines__doc__,
"readlines($self, sizehintobj=None, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF \
{"readlines", (PyCFunction)_multibytecodec_MultibyteStreamReader_readlines, METH_VARARGS, _multibytecodec_MultibyteStreamReader_readlines__doc__},
static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
PyObject *sizehintobj);
static PyObject *
_multibytecodec_MultibyteStreamReader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
{
PyObject *return_value = NULL;
PyObject *sizehintobj = Py_None;
if (!PyArg_UnpackTuple(args, "readlines",
0, 1,
&sizehintobj)) {
goto exit;
}
return_value = _multibytecodec_MultibyteStreamReader_readlines_impl(self, sizehintobj);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteStreamReader_reset__doc__,
"reset($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF \
{"reset", (PyCFunction)_multibytecodec_MultibyteStreamReader_reset, METH_NOARGS, _multibytecodec_MultibyteStreamReader_reset__doc__},
static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self);
static PyObject *
_multibytecodec_MultibyteStreamReader_reset(MultibyteStreamReaderObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteStreamReader_reset_impl(self);
}
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_write__doc__,
"write($self, strobj, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF \
{"write", (PyCFunction)_multibytecodec_MultibyteStreamWriter_write, METH_O, _multibytecodec_MultibyteStreamWriter_write__doc__},
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_writelines__doc__,
"writelines($self, lines, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF \
{"writelines", (PyCFunction)_multibytecodec_MultibyteStreamWriter_writelines, METH_O, _multibytecodec_MultibyteStreamWriter_writelines__doc__},
PyDoc_STRVAR(_multibytecodec_MultibyteStreamWriter_reset__doc__,
"reset($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF \
{"reset", (PyCFunction)_multibytecodec_MultibyteStreamWriter_reset, METH_NOARGS, _multibytecodec_MultibyteStreamWriter_reset__doc__},
static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self);
static PyObject *
_multibytecodec_MultibyteStreamWriter_reset(MultibyteStreamWriterObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteStreamWriter_reset_impl(self);
}
PyDoc_STRVAR(_multibytecodec___create_codec__doc__,
"__create_codec($module, arg, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \
{"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__},
/*[clinic end generated code: output=134b9e36cb985939 input=a9049054013a1b77]*/

View file

@ -0,0 +1,54 @@
/* These routines may be quite inefficient, but it's used only to emulate old
* standards. */
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
# define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
if (config == (void *)2000 && ( \
(c) == 0x9B1C || (c) == 0x4FF1 || \
(c) == 0x525D || (c) == 0x541E || \
(c) == 0x5653 || (c) == 0x59F8 || \
(c) == 0x5C5B || (c) == 0x5E77 || \
(c) == 0x7626 || (c) == 0x7E6B)) { \
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
} \
else if (config == (void *)2000 && (c) == 0x9B1D) { \
(assi) = 0x8000 | 0x7d3b; \
}
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
if (config == (void *)2000 && (c) == 0x20B9F) { \
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
}
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
# define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
if (config == (void *)2000 && \
(((c1) == 0x2E && (c2) == 0x21) || \
((c1) == 0x2F && (c2) == 0x7E) || \
((c1) == 0x4F && (c2) == 0x54) || \
((c1) == 0x4F && (c2) == 0x7E) || \
((c1) == 0x74 && (c2) == 0x27) || \
((c1) == 0x7E && (c2) == 0x7A) || \
((c1) == 0x7E && (c2) == 0x7B) || \
((c1) == 0x7E && (c2) == 0x7C) || \
((c1) == 0x7E && (c2) == 0x7D) || \
((c1) == 0x7E && (c2) == 0x7E))) { \
return EMULATE_JISX0213_2000_DECODE_INVALID; \
}
#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) { \
OUTCHAR(0x9B1D); \
}
#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) { \
(assi) = 0x9B1D; \
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
#define JISX0213_ENCPAIRS 46
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
static const Py_UCS4 __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
};
static const struct widedbcs_index jisx0213_pair_decmap[256] = {
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
};
static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
0x31f70000,0x2675},{0x31f7309a,0x2678},
};
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,136 @@
/*
* multibytecodec.h: Common Multibyte Codec Implementation
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#ifndef _PYTHON_MULTIBYTECODEC_H_
#define _PYTHON_MULTIBYTECODEC_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
typedef unsigned short ucs2_t, DBCHAR;
#endif
typedef union {
void *p;
int i;
unsigned char c[8];
ucs2_t u2[4];
Py_UCS4 u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config,
int kind, void *data,
Py_ssize_t *inpos, Py_ssize_t inlen,
unsigned char **outbuf, Py_ssize_t outleft,
int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
const void *config,
unsigned char **outbuf, Py_ssize_t outleft);
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
const void *config,
const unsigned char **inbuf, Py_ssize_t inleft,
_PyUnicodeWriter *writer);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
const void *config);
typedef struct {
const char *encoding;
const void *config;
mbcodec_init codecinit;
mbencode_func encode;
mbencodeinit_func encinit;
mbencodereset_func encreset;
mbdecode_func decode;
mbdecodeinit_func decinit;
mbdecodereset_func decreset;
} MultibyteCodec;
typedef struct {
PyObject_HEAD
MultibyteCodec *codec;
} MultibyteCodecObject;
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
#define _MultibyteStatefulCodec_HEAD \
PyObject_HEAD \
MultibyteCodec *codec; \
MultibyteCodec_State state; \
PyObject *errors;
typedef struct {
_MultibyteStatefulCodec_HEAD
} MultibyteStatefulCodecContext;
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
PyObject *pending;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext;
#define MAXDECPENDING 8
#define _MultibyteStatefulDecoder_HEAD \
_MultibyteStatefulCodec_HEAD \
unsigned char pending[MAXDECPENDING]; \
Py_ssize_t pendingsize;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteStatefulDecoderContext;
typedef struct {
_MultibyteStatefulEncoder_HEAD
} MultibyteIncrementalEncoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
} MultibyteIncrementalDecoderObject;
typedef struct {
_MultibyteStatefulDecoder_HEAD
PyObject *stream;
} MultibyteStreamReaderObject;
typedef struct {
_MultibyteStatefulEncoder_HEAD
PyObject *stream;
} MultibyteStreamWriterObject;
/* positive values for illegal sequences */
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
#define MBERR_INTERNAL (-3) /* internal runtime error */
#define MBERR_EXCEPTION (-4) /* an exception has been raised */
#define ERROR_STRICT (PyObject *)(1)
#define ERROR_IGNORE (PyObject *)(2)
#define ERROR_REPLACE (PyObject *)(3)
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
#define ERROR_DECREF(p) \
do { \
if (p != NULL && ERROR_ISCUSTOM(p)) \
Py_DECREF(p); \
} while (0);
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
#define MBENC_MAX MBENC_FLUSH
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"
#ifdef __cplusplus
}
#endif
#endif