mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-14 18:27:54 +00:00
We can now link even smaller Python binaries. For example, the hello.com program in the Python build directory is a compiled linked executable of hello.py which just prints hello world. Using decentralized sections, we can make that binary 1.9mb in size (noting that python.com is 6.3 megs!) This works for nontrivial programs too. For example, say we want an APE binary that's equivalent to python.com -m http.server. Our makefile now builds such a binary using the new launcher and it's only 3.2mb in size since Python sources get turned into ELF objects, which tell our linker that we need things like native hashing algorithm code.
788 lines
22 KiB
C
788 lines
22 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
/* clang-format off */
|
|
/*
|
|
* _codecs_jp.c: Codecs collection for Japanese encodings
|
|
*
|
|
* Written by Hye-Shik "Bourne to Macro" Chang <perky@FreeBSD.org>
|
|
*/
|
|
|
|
#define USING_BINARY_PAIR_SEARCH
|
|
#define EMPBASE 0x20000
|
|
|
|
#include "third_party/python/Modules/cjkcodecs/cjkcodecs.h"
|
|
#include "third_party/python/Modules/cjkcodecs/mappings_jp.inc"
|
|
#include "third_party/python/Modules/cjkcodecs/mappings_jisx0213_pair.inc"
|
|
#include "third_party/python/Modules/cjkcodecs/alg_jisx0201.inc"
|
|
#include "third_party/python/Include/yoink.h"
|
|
#include "third_party/python/Include/import.h"
|
|
#include "third_party/python/Modules/cjkcodecs/emu_jisx0213_2000.inc"
|
|
|
|
PYTHON_PROVIDE("_codecs_jp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_cp932ext");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0208");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0212");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_1_bmp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_1_emp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_2_bmp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_2_emp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_bmp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_emp");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisx0213_pair");
|
|
PYTHON_PROVIDE("_codecs_jp.__map_jisxcommon");
|
|
PYTHON_PROVIDE("_codecs_jp.getcodec");
|
|
|
|
/*
|
|
* CP932 codec
|
|
*/
|
|
|
|
ENCODER(cp932)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code;
|
|
unsigned char c1, c2;
|
|
|
|
if (c <= 0x80) {
|
|
WRITEBYTE1((unsigned char)c);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
else if (c >= 0xff61 && c <= 0xff9f) {
|
|
WRITEBYTE1(c - 0xfec0);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
|
/* Windows compatibility */
|
|
REQUIRE_OUTBUF(1);
|
|
if (c == 0xf8f0)
|
|
OUTBYTE1(0xa0);
|
|
else
|
|
OUTBYTE1(c - 0xf8f1 + 0xfd);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
if (c > 0xFFFF)
|
|
return 1;
|
|
REQUIRE_OUTBUF(2);
|
|
|
|
if (TRYMAP_ENC(cp932ext, code, c)) {
|
|
OUTBYTE1(code >> 8);
|
|
OUTBYTE2(code & 0xff);
|
|
}
|
|
else if (TRYMAP_ENC(jisxcommon, code, c)) {
|
|
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
|
return 1;
|
|
|
|
/* JIS X 0208 */
|
|
c1 = code >> 8;
|
|
c2 = code & 0xff;
|
|
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
|
c1 = (c1 - 0x21) >> 1;
|
|
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
|
|
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
|
}
|
|
else if (c >= 0xe000 && c < 0xe758) {
|
|
/* User-defined area */
|
|
c1 = (Py_UCS4)(c - 0xe000) / 188;
|
|
c2 = (Py_UCS4)(c - 0xe000) % 188;
|
|
OUTBYTE1(c1 + 0xf0);
|
|
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
|
}
|
|
else
|
|
return 1;
|
|
|
|
NEXT(1, 2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DECODER(cp932)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1, c2;
|
|
Py_UCS4 decoded;
|
|
|
|
if (c <= 0x80) {
|
|
OUTCHAR(c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
else if (c >= 0xa0 && c <= 0xdf) {
|
|
if (c == 0xa0)
|
|
OUTCHAR(0xf8f0); /* half-width katakana */
|
|
else
|
|
OUTCHAR(0xfec0 + c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
else if (c >= 0xfd/* && c <= 0xff*/) {
|
|
/* Windows compatibility */
|
|
OUTCHAR(0xf8f1 - 0xfd + c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
|
|
if (TRYMAP_DEC(cp932ext, decoded, c, c2))
|
|
OUTCHAR(decoded);
|
|
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
|
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
|
return 1;
|
|
|
|
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
|
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
|
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
|
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
|
|
|
if (TRYMAP_DEC(jisx0208, decoded, c, c2))
|
|
OUTCHAR(decoded);
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c >= 0xf0 && c <= 0xf9) {
|
|
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
|
(c2 >= 0x80 && c2 <= 0xfc))
|
|
OUTCHAR(0xe000 + 188 * (c - 0xf0) +
|
|
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
|
|
else
|
|
return 1;
|
|
}
|
|
else
|
|
return 1;
|
|
|
|
NEXT_IN(2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* EUC-JIS-2004 codec
|
|
*/
|
|
|
|
ENCODER(euc_jis_2004)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code;
|
|
Py_ssize_t insize;
|
|
|
|
if (c < 0x80) {
|
|
WRITEBYTE1(c);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
insize = 1;
|
|
|
|
if (c <= 0xFFFF) {
|
|
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
|
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
|
|
if (code == MULTIC) {
|
|
if (inlen - *inpos < 2) {
|
|
if (flags & MBENC_FLUSH) {
|
|
code = find_pairencmap(
|
|
(ucs2_t)c, 0,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV)
|
|
return 1;
|
|
}
|
|
else
|
|
return MBERR_TOOFEW;
|
|
}
|
|
else {
|
|
Py_UCS4 c2 = INCHAR2;
|
|
code = find_pairencmap(
|
|
(ucs2_t)c, c2,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV) {
|
|
code = find_pairencmap(
|
|
(ucs2_t)c, 0,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV)
|
|
return 1;
|
|
} else
|
|
insize = 2;
|
|
}
|
|
}
|
|
}
|
|
else if (TRYMAP_ENC(jisxcommon, code, c))
|
|
;
|
|
else if (c >= 0xff61 && c <= 0xff9f) {
|
|
/* JIS X 0201 half-width katakana */
|
|
WRITEBYTE2(0x8e, c - 0xfec0);
|
|
NEXT(1, 2);
|
|
continue;
|
|
}
|
|
else if (c == 0xff3c)
|
|
/* F/W REVERSE SOLIDUS (see NOTES) */
|
|
code = 0x2140;
|
|
else if (c == 0xff5e)
|
|
/* F/W TILDE (see NOTES) */
|
|
code = 0x2232;
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c >> 16 == EMPBASE >> 16) {
|
|
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
|
else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff))
|
|
;
|
|
else
|
|
return insize;
|
|
}
|
|
else
|
|
return insize;
|
|
|
|
if (code & 0x8000) {
|
|
/* Codeset 2 */
|
|
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
|
|
NEXT(insize, 3);
|
|
} else {
|
|
/* Codeset 1 */
|
|
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
|
|
NEXT(insize, 2);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DECODER(euc_jis_2004)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1;
|
|
Py_UCS4 code, decoded;
|
|
|
|
if (c < 0x80) {
|
|
OUTCHAR(c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
|
|
if (c == 0x8e) {
|
|
/* JIS X 0201 half-width katakana */
|
|
unsigned char c2;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
|
OUTCHAR(0xfec0 + c2);
|
|
NEXT_IN(2);
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c == 0x8f) {
|
|
unsigned char c2, c3;
|
|
|
|
REQUIRE_INBUF(3);
|
|
c2 = INBYTE2 ^ 0x80;
|
|
c3 = INBYTE3 ^ 0x80;
|
|
|
|
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
|
|
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
|
|
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c2, c3))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_2_emp, code, c2, c3)) {
|
|
OUTCHAR(EMPBASE | code);
|
|
NEXT_IN(3);
|
|
continue;
|
|
}
|
|
else if (TRYMAP_DEC(jisx0212, decoded, c2, c3))
|
|
OUTCHAR(decoded);
|
|
else
|
|
return 1;
|
|
NEXT_IN(3);
|
|
}
|
|
else {
|
|
unsigned char c2;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c ^= 0x80;
|
|
c2 = INBYTE2 ^ 0x80;
|
|
|
|
/* JIS X 0213 Plane 1 */
|
|
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
|
|
else if (c == 0x21 && c2 == 0x40)
|
|
OUTCHAR(0xff3c);
|
|
else if (c == 0x22 && c2 == 0x32)
|
|
OUTCHAR(0xff5e);
|
|
else if (TRYMAP_DEC(jisx0208, decoded, c, c2))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_1_emp, code, c, c2)) {
|
|
OUTCHAR(EMPBASE | code);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
else if (TRYMAP_DEC(jisx0213_pair, code, c, c2)) {
|
|
OUTCHAR2(code >> 16, code & 0xffff);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
else
|
|
return 1;
|
|
NEXT_IN(2);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* EUC-JP codec
|
|
*/
|
|
|
|
ENCODER(euc_jp)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code;
|
|
|
|
if (c < 0x80) {
|
|
WRITEBYTE1((unsigned char)c);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
if (c > 0xFFFF)
|
|
return 1;
|
|
|
|
if (TRYMAP_ENC(jisxcommon, code, c))
|
|
;
|
|
else if (c >= 0xff61 && c <= 0xff9f) {
|
|
/* JIS X 0201 half-width katakana */
|
|
WRITEBYTE2(0x8e, c - 0xfec0);
|
|
NEXT(1, 2);
|
|
continue;
|
|
}
|
|
#ifndef STRICT_BUILD
|
|
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
|
code = 0x2140;
|
|
else if (c == 0xa5) { /* YEN SIGN */
|
|
WRITEBYTE1(0x5c);
|
|
NEXT(1, 1);
|
|
continue;
|
|
} else if (c == 0x203e) { /* OVERLINE */
|
|
WRITEBYTE1(0x7e);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
#endif
|
|
else
|
|
return 1;
|
|
|
|
if (code & 0x8000) {
|
|
/* JIS X 0212 */
|
|
WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80);
|
|
NEXT(1, 3);
|
|
} else {
|
|
/* JIS X 0208 */
|
|
WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80);
|
|
NEXT(1, 2);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DECODER(euc_jp)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1;
|
|
Py_UCS4 decoded;
|
|
|
|
if (c < 0x80) {
|
|
OUTCHAR(c);
|
|
NEXT_IN(1);
|
|
continue;
|
|
}
|
|
|
|
if (c == 0x8e) {
|
|
/* JIS X 0201 half-width katakana */
|
|
unsigned char c2;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
|
OUTCHAR(0xfec0 + c2);
|
|
NEXT_IN(2);
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c == 0x8f) {
|
|
unsigned char c2, c3;
|
|
|
|
REQUIRE_INBUF(3);
|
|
c2 = INBYTE2;
|
|
c3 = INBYTE3;
|
|
/* JIS X 0212 */
|
|
if (TRYMAP_DEC(jisx0212, decoded, c2 ^ 0x80, c3 ^ 0x80)) {
|
|
OUTCHAR(decoded);
|
|
NEXT_IN(3);
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else {
|
|
unsigned char c2;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
/* JIS X 0208 */
|
|
#ifndef STRICT_BUILD
|
|
if (c == 0xa1 && c2 == 0xc0)
|
|
/* FULL-WIDTH REVERSE SOLIDUS */
|
|
OUTCHAR(0xff3c);
|
|
else
|
|
#endif
|
|
if (TRYMAP_DEC(jisx0208, decoded, c ^ 0x80, c2 ^ 0x80))
|
|
OUTCHAR(decoded);
|
|
else
|
|
return 1;
|
|
NEXT_IN(2);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* SHIFT_JIS codec
|
|
*/
|
|
|
|
ENCODER(shift_jis)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code;
|
|
unsigned char c1, c2;
|
|
|
|
#ifdef STRICT_BUILD
|
|
JISX0201_R_ENCODE(c, code)
|
|
#else
|
|
if (c < 0x80)
|
|
code = c;
|
|
else if (c == 0x00a5)
|
|
code = 0x5c; /* YEN SIGN */
|
|
else if (c == 0x203e)
|
|
code = 0x7e; /* OVERLINE */
|
|
#endif
|
|
else JISX0201_K_ENCODE(c, code)
|
|
else if (c > 0xFFFF)
|
|
return 1;
|
|
else
|
|
code = NOCHAR;
|
|
|
|
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
|
REQUIRE_OUTBUF(1);
|
|
|
|
OUTBYTE1((unsigned char)code);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
REQUIRE_OUTBUF(2);
|
|
|
|
if (code == NOCHAR) {
|
|
if (TRYMAP_ENC(jisxcommon, code, c))
|
|
;
|
|
#ifndef STRICT_BUILD
|
|
else if (c == 0xff3c)
|
|
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
|
#endif
|
|
else
|
|
return 1;
|
|
|
|
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
|
return 1;
|
|
}
|
|
|
|
c1 = code >> 8;
|
|
c2 = code & 0xff;
|
|
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
|
c1 = (c1 - 0x21) >> 1;
|
|
OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1);
|
|
OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41);
|
|
NEXT(1, 2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DECODER(shift_jis)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1;
|
|
Py_UCS4 decoded;
|
|
|
|
#ifdef STRICT_BUILD
|
|
JISX0201_R_DECODE(c, writer)
|
|
#else
|
|
if (c < 0x80)
|
|
OUTCHAR(c);
|
|
#endif
|
|
else JISX0201_K_DECODE(c, writer)
|
|
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
|
unsigned char c1, c2;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
|
return 1;
|
|
|
|
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
|
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
|
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
|
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
|
|
|
#ifndef STRICT_BUILD
|
|
if (c1 == 0x21 && c2 == 0x40) {
|
|
/* FULL-WIDTH REVERSE SOLIDUS */
|
|
OUTCHAR(0xff3c);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
#endif
|
|
if (TRYMAP_DEC(jisx0208, decoded, c1, c2)) {
|
|
OUTCHAR(decoded);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else
|
|
return 1;
|
|
|
|
NEXT_IN(1); /* JIS X 0201 */
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* SHIFT_JIS-2004 codec
|
|
*/
|
|
|
|
ENCODER(shift_jis_2004)
|
|
{
|
|
while (*inpos < inlen) {
|
|
Py_UCS4 c = INCHAR1;
|
|
DBCHAR code = NOCHAR;
|
|
int c1, c2;
|
|
Py_ssize_t insize;
|
|
|
|
JISX0201_ENCODE(c, code)
|
|
|
|
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
|
WRITEBYTE1((unsigned char)code);
|
|
NEXT(1, 1);
|
|
continue;
|
|
}
|
|
|
|
REQUIRE_OUTBUF(2);
|
|
insize = 1;
|
|
|
|
if (code == NOCHAR) {
|
|
if (c <= 0xffff) {
|
|
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
|
else if (TRYMAP_ENC(jisx0213_bmp, code, c)) {
|
|
if (code == MULTIC) {
|
|
if (inlen - *inpos < 2) {
|
|
if (flags & MBENC_FLUSH) {
|
|
code = find_pairencmap
|
|
((ucs2_t)c, 0,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV)
|
|
return 1;
|
|
}
|
|
else
|
|
return MBERR_TOOFEW;
|
|
}
|
|
else {
|
|
Py_UCS4 ch2 = INCHAR2;
|
|
code = find_pairencmap(
|
|
(ucs2_t)c, ch2,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV) {
|
|
code = find_pairencmap(
|
|
(ucs2_t)c, 0,
|
|
jisx0213_pair_encmap,
|
|
JISX0213_ENCPAIRS);
|
|
if (code == DBCINV)
|
|
return 1;
|
|
}
|
|
else
|
|
insize = 2;
|
|
}
|
|
}
|
|
}
|
|
else if (TRYMAP_ENC(jisxcommon, code, c)) {
|
|
/* abandon JIS X 0212 codes */
|
|
if (code & 0x8000)
|
|
return 1;
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
else if (c >> 16 == EMPBASE >> 16) {
|
|
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
|
else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff))
|
|
;
|
|
else
|
|
return insize;
|
|
}
|
|
else
|
|
return insize;
|
|
}
|
|
|
|
c1 = code >> 8;
|
|
c2 = (code & 0xff) - 0x21;
|
|
|
|
if (c1 & 0x80) {
|
|
/* Plane 2 */
|
|
if (c1 >= 0xee)
|
|
c1 -= 0x87;
|
|
else if (c1 >= 0xac || c1 == 0xa8)
|
|
c1 -= 0x49;
|
|
else
|
|
c1 -= 0x43;
|
|
}
|
|
else {
|
|
/* Plane 1 */
|
|
c1 -= 0x21;
|
|
}
|
|
|
|
if (c1 & 1)
|
|
c2 += 0x5e;
|
|
c1 >>= 1;
|
|
OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1));
|
|
OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41));
|
|
|
|
NEXT(insize, 2);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
DECODER(shift_jis_2004)
|
|
{
|
|
while (inleft > 0) {
|
|
unsigned char c = INBYTE1;
|
|
|
|
JISX0201_DECODE(c, writer)
|
|
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
|
|
unsigned char c1, c2;
|
|
Py_UCS4 code, decoded;
|
|
|
|
REQUIRE_INBUF(2);
|
|
c2 = INBYTE2;
|
|
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
|
return 1;
|
|
|
|
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
|
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
|
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
|
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
|
|
|
if (c1 < 0x5e) { /* Plane 1 */
|
|
c1 += 0x21;
|
|
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
|
|
c1, c2)
|
|
else if (TRYMAP_DEC(jisx0208, decoded, c1, c2))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c1, c2))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_1_emp, code, c1, c2))
|
|
OUTCHAR(EMPBASE | code);
|
|
else if (TRYMAP_DEC(jisx0213_pair, code, c1, c2))
|
|
OUTCHAR2(code >> 16, code & 0xffff);
|
|
else
|
|
return 1;
|
|
NEXT_IN(2);
|
|
}
|
|
else { /* Plane 2 */
|
|
if (c1 >= 0x67)
|
|
c1 += 0x07;
|
|
else if (c1 >= 0x63 || c1 == 0x5f)
|
|
c1 -= 0x37;
|
|
else
|
|
c1 -= 0x3d;
|
|
|
|
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
|
|
c1, c2)
|
|
else if (TRYMAP_DEC(jisx0213_2_bmp, decoded, c1, c2))
|
|
OUTCHAR(decoded);
|
|
else if (TRYMAP_DEC(jisx0213_2_emp, code, c1, c2)) {
|
|
OUTCHAR(EMPBASE | code);
|
|
NEXT_IN(2);
|
|
continue;
|
|
}
|
|
else
|
|
return 1;
|
|
NEXT_IN(2);
|
|
}
|
|
continue;
|
|
}
|
|
else
|
|
return 1;
|
|
|
|
NEXT_IN(1); /* JIS X 0201 */
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
BEGIN_MAPPINGS_LIST
|
|
MAPPING_DECONLY(jisx0208)
|
|
MAPPING_DECONLY(jisx0212)
|
|
MAPPING_ENCONLY(jisxcommon)
|
|
MAPPING_DECONLY(jisx0213_1_bmp)
|
|
MAPPING_DECONLY(jisx0213_2_bmp)
|
|
MAPPING_ENCONLY(jisx0213_bmp)
|
|
MAPPING_DECONLY(jisx0213_1_emp)
|
|
MAPPING_DECONLY(jisx0213_2_emp)
|
|
MAPPING_ENCONLY(jisx0213_emp)
|
|
MAPPING_ENCDEC(jisx0213_pair)
|
|
MAPPING_ENCDEC(cp932ext)
|
|
END_MAPPINGS_LIST
|
|
|
|
BEGIN_CODECS_LIST
|
|
CODEC_STATELESS(shift_jis)
|
|
CODEC_STATELESS(cp932)
|
|
CODEC_STATELESS(euc_jp)
|
|
CODEC_STATELESS(shift_jis_2004)
|
|
CODEC_STATELESS(euc_jis_2004)
|
|
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
|
|
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
|
|
END_CODECS_LIST
|
|
|
|
I_AM_A_MODULE_FOR(jp)
|
|
|
|
_Section(".rodata.pytab.1") const struct _inittab _PyImport_Inittab__codecs_jp = {
|
|
"_codecs_jp",
|
|
PyInit__codecs_jp,
|
|
};
|