python-3.6.zip added from Github

README.cosmo contains the necessary links.
This commit is contained in:
ahgamut 2021-08-08 09:38:33 +05:30 committed by Justine Tunney
parent 75fc601ff5
commit 0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions

View file

@ -0,0 +1,53 @@
#!/usr/bin/env python3
""" Compare the output of two codecs.
(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
Licensed to PSF under a Contributor Agreement.
"""
import sys
def compare_codecs(encoding1, encoding2):
print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2))
mismatch = 0
# Check encoding
for i in range(sys.maxunicode+1):
u = chr(i)
try:
c1 = u.encode(encoding1)
except UnicodeError as reason:
c1 = '<undefined>'
try:
c2 = u.encode(encoding2)
except UnicodeError as reason:
c2 = '<undefined>'
if c1 != c2:
print(' * encoding mismatch for 0x%04X: %-14r != %r' % \
(i, c1, c2))
mismatch += 1
# Check decoding
for i in range(256):
c = bytes([i])
try:
u1 = c.decode(encoding1)
except UnicodeError:
u1 = '<undefined>'
try:
u2 = c.decode(encoding2)
except UnicodeError:
u2 = '<undefined>'
if u1 != u2:
print(' * decoding mismatch for 0x%04X: %-14r != %r' % \
(i, u1, u2))
mismatch += 1
if mismatch:
print()
print('Found %i mismatches' % mismatch)
else:
print('-> Codecs are identical.')
if __name__ == '__main__':
compare_codecs(sys.argv[1], sys.argv[2])

View file

@ -0,0 +1,68 @@
import os, string
codecs = {
'cn': ('gb2312', 'gbk', 'gb18030', 'hz'),
'tw': ('big5', 'cp950'),
'hk': ('big5hkscs',),
'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
'euc_jis_2004', 'shift_jis_2004'),
'kr': ('cp949', 'euc_kr', 'johab'),
'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
'iso2022_kr'),
}
TEMPLATE = string.Template("""\
#
# $encoding.py: Python Unicode Codec for $ENCODING
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_$owner, codecs
import _multibytecodec as mbc
codec = _codecs_$owner.getcodec('$encoding')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='$encoding',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
""")
def gencodecs(prefix):
for loc, encodings in codecs.items():
for enc in encodings:
code = TEMPLATE.substitute(ENCODING=enc.upper(),
encoding=enc.lower(),
owner=loc)
codecpath = os.path.join(prefix, enc + '.py')
open(codecpath, 'w').write(code)
if __name__ == '__main__':
import sys
gencodecs(sys.argv[1])

View file

@ -0,0 +1,429 @@
""" Unicode Mapping Parser and Codec Generator.
This script parses Unicode mapping files as available from the Unicode
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
modules from them. The codecs use the standard character mapping codec
to actually apply the mapping.
Synopsis: gencodec.py dir codec_prefix
All files in dir are scanned and those producing non-empty mappings
will be written to <codec_prefix><mapname>.py with <mapname> being the
first part of the map's filename ('a' in a.b.c.txt) converted to
lowercase with hyphens replaced by underscores.
The tool also writes marshalled versions of the mapping tables to the
same location (with .mapping extension).
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright Guido van Rossum, 2000.
Table generation:
(c) Copyright Marc-Andre Lemburg, 2005.
Licensed to PSF under a Contributor Agreement.
"""#"
import re, os, marshal, codecs
# Maximum allowed size of charmap tables
MAX_TABLE_SIZE = 8192
# Standard undefined Unicode code point
UNI_UNDEFINED = chr(0xFFFE)
# Placeholder for a missing code point
MISSING_CODE = -1
mapRE = re.compile(r'((?:0x[0-9a-fA-F]+\+?)+)'
r'\s+'
r'((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
r'\s*'
r'(#.+)?')
def parsecodes(codes, len=len, range=range):
""" Converts code combinations to either a single code integer
or a tuple of integers.
meta-codes (in angular brackets, e.g. <LR> and <RL>) are
ignored.
Empty codes or illegal ones are returned as None.
"""
if not codes:
return MISSING_CODE
l = codes.split('+')
if len(l) == 1:
return int(l[0],16)
for i in range(len(l)):
try:
l[i] = int(l[i],16)
except ValueError:
l[i] = MISSING_CODE
l = [x for x in l if x != MISSING_CODE]
if len(l) == 1:
return l[0]
else:
return tuple(l)
def readmap(filename):
f = open(filename,'r')
lines = f.readlines()
f.close()
enc2uni = {}
identity = []
unmapped = list(range(256))
# UTC mapping tables per convention don't include the identity
# mappings for code points 0x00 - 0x1F and 0x7F, unless these are
# explicitly mapped to different characters or undefined
for i in list(range(32)) + [127]:
identity.append(i)
unmapped.remove(i)
enc2uni[i] = (i, 'CONTROL CHARACTER')
for line in lines:
line = line.strip()
if not line or line[0] == '#':
continue
m = mapRE.match(line)
if not m:
#print '* not matched: %s' % repr(line)
continue
enc,uni,comment = m.groups()
enc = parsecodes(enc)
uni = parsecodes(uni)
if comment is None:
comment = ''
else:
comment = comment[1:].strip()
if not isinstance(enc, tuple) and enc < 256:
if enc in unmapped:
unmapped.remove(enc)
if enc == uni:
identity.append(enc)
enc2uni[enc] = (uni,comment)
else:
enc2uni[enc] = (uni,comment)
# If there are more identity-mapped entries than unmapped entries,
# it pays to generate an identity dictionary first, and add explicit
# mappings to None for the rest
if len(identity) >= len(unmapped):
for enc in unmapped:
enc2uni[enc] = (MISSING_CODE, "")
enc2uni['IDENTITY'] = 256
return enc2uni
def hexrepr(t, precision=4):
if t is None:
return 'None'
try:
len(t)
except TypeError:
return '0x%0*X' % (precision, t)
try:
return '(' + ', '.join(['0x%0*X' % (precision, item)
for item in t]) + ')'
except TypeError as why:
print('* failed to convert %r: %s' % (t, why))
raise
def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
l = []
append = l.append
if "IDENTITY" in map:
append("%s = codecs.make_identity_dict(range(%d))" %
(varname, map["IDENTITY"]))
append("%s.update({" % varname)
splits = 1
del map["IDENTITY"]
identity = 1
else:
append("%s = {" % varname)
splits = 0
identity = 0
mappings = sorted(map.items())
i = 0
key_precision, value_precision = precisions
for mapkey, mapvalue in mappings:
mapcomment = ''
if isinstance(mapkey, tuple):
(mapkey, mapcomment) = mapkey
if isinstance(mapvalue, tuple):
(mapvalue, mapcomment) = mapvalue
if mapkey is None:
continue
if (identity and
mapkey == mapvalue and
mapkey < 256):
# No need to include identity mappings, since these
# are already set for the first 256 code points.
continue
key = hexrepr(mapkey, key_precision)
value = hexrepr(mapvalue, value_precision)
if mapcomment and comments:
append(' %s: %s,\t# %s' % (key, value, mapcomment))
else:
append(' %s: %s,' % (key, value))
i += 1
if i == 4096:
# Split the definition into parts to that the Python
# parser doesn't dump core
if splits == 0:
append('}')
else:
append('})')
append('%s.update({' % varname)
i = 0
splits = splits + 1
if splits == 0:
append('}')
else:
append('})')
return l
def python_tabledef_code(varname, map, comments=1, key_precision=2):
l = []
append = l.append
append('%s = (' % varname)
# Analyze map and create table dict
mappings = sorted(map.items())
table = {}
maxkey = 255
if 'IDENTITY' in map:
for key in range(256):
table[key] = (key, '')
del map['IDENTITY']
for mapkey, mapvalue in mappings:
mapcomment = ''
if isinstance(mapkey, tuple):
(mapkey, mapcomment) = mapkey
if isinstance(mapvalue, tuple):
(mapvalue, mapcomment) = mapvalue
if mapkey == MISSING_CODE:
continue
table[mapkey] = (mapvalue, mapcomment)
if mapkey > maxkey:
maxkey = mapkey
if maxkey > MAX_TABLE_SIZE:
# Table too large
return None
# Create table code
maxchar = 0
for key in range(maxkey + 1):
if key not in table:
mapvalue = MISSING_CODE
mapcomment = 'UNDEFINED'
else:
mapvalue, mapcomment = table[key]
if mapvalue == MISSING_CODE:
mapchar = UNI_UNDEFINED
else:
if isinstance(mapvalue, tuple):
# 1-n mappings not supported
return None
else:
mapchar = chr(mapvalue)
maxchar = max(maxchar, ord(mapchar))
if mapcomment and comments:
append(' %a \t# %s -> %s' % (mapchar,
hexrepr(key, key_precision),
mapcomment))
else:
append(' %a' % mapchar)
if maxchar < 256:
append(' %a \t## Widen to UCS2 for optimization' % UNI_UNDEFINED)
append(')')
return l
def codegen(name, map, encodingname, comments=1):
""" Returns Python source for the given map.
Comments are included in the source, if comments is true (default).
"""
# Generate code
decoding_map_code = python_mapdef_code(
'decoding_map',
map,
comments=comments)
decoding_table_code = python_tabledef_code(
'decoding_table',
map,
comments=comments)
encoding_map_code = python_mapdef_code(
'encoding_map',
codecs.make_encoding_map(map),
comments=comments,
precisions=(4, 2))
if decoding_table_code:
suffix = 'table'
else:
suffix = 'map'
l = [
'''\
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return codecs.charmap_encode(input, errors, encoding_%s)
def decode(self, input, errors='strict'):
return codecs.charmap_decode(input, errors, decoding_%s)
''' % (encodingname, name, suffix, suffix)]
l.append('''\
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input, self.errors, encoding_%s)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' %
(suffix, suffix))
l.append('''
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name=%r,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
''' % encodingname.replace('_', '-'))
# Add decoding table or map (with preference to the table)
if not decoding_table_code:
l.append('''
### Decoding Map
''')
l.extend(decoding_map_code)
else:
l.append('''
### Decoding Table
''')
l.extend(decoding_table_code)
# Add encoding map
if decoding_table_code:
l.append('''
### Encoding table
encoding_table = codecs.charmap_build(decoding_table)
''')
else:
l.append('''
### Encoding Map
''')
l.extend(encoding_map_code)
# Final new-line
l.append('')
return '\n'.join(l).expandtabs()
def pymap(name,map,pyfile,encodingname,comments=1):
code = codegen(name,map,encodingname,comments)
f = open(pyfile,'w')
f.write(code)
f.close()
def marshalmap(name,map,marshalfile):
d = {}
for e,(u,c) in map.items():
d[e] = (u,c)
f = open(marshalfile,'wb')
marshal.dump(d,f)
f.close()
def convertdir(dir, dirprefix='', nameprefix='', comments=1):
mapnames = os.listdir(dir)
for mapname in mapnames:
mappathname = os.path.join(dir, mapname)
if not os.path.isfile(mappathname):
continue
name = os.path.split(mapname)[1]
name = name.replace('-','_')
name = name.split('.')[0]
name = name.lower()
name = nameprefix + name
codefile = name + '.py'
marshalfile = name + '.mapping'
print('converting %s to %s and %s' % (mapname,
dirprefix + codefile,
dirprefix + marshalfile))
try:
map = readmap(os.path.join(dir,mapname))
if not map:
print('* map is empty; skipping')
else:
pymap(mappathname, map, dirprefix + codefile,name,comments)
marshalmap(mappathname, map, dirprefix + marshalfile)
except ValueError as why:
print('* conversion failed: %s' % why)
raise
def rewritepythondir(dir, dirprefix='', comments=1):
mapnames = os.listdir(dir)
for mapname in mapnames:
if not mapname.endswith('.mapping'):
continue
name = mapname[:-len('.mapping')]
codefile = name + '.py'
print('converting %s to %s' % (mapname,
dirprefix + codefile))
try:
map = marshal.load(open(os.path.join(dir,mapname),
'rb'))
if not map:
print('* map is empty; skipping')
else:
pymap(mapname, map, dirprefix + codefile,name,comments)
except ValueError as why:
print('* conversion failed: %s' % why)
if __name__ == '__main__':
import sys
if 1:
convertdir(*sys.argv[1:])
else:
rewritepythondir(*sys.argv[1:])

View file

@ -0,0 +1,61 @@
"""This script generates a Python codec module from a Windows Code Page.
It uses the function MultiByteToWideChar to generate a decoding table.
"""
import ctypes
from ctypes import wintypes
from gencodec import codegen
import unicodedata
def genwinmap(codepage):
MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
wintypes.LPCSTR, ctypes.c_int,
wintypes.LPWSTR, ctypes.c_int]
MultiByteToWideChar.restype = ctypes.c_int
enc2uni = {}
for i in list(range(32)) + [127]:
enc2uni[i] = (i, 'CONTROL CHARACTER')
for i in range(256):
buf = ctypes.create_unicode_buffer(2)
ret = MultiByteToWideChar(
codepage, 0,
bytes([i]), 1,
buf, 2)
assert ret == 1, "invalid code page"
assert buf[1] == '\x00'
try:
name = unicodedata.name(buf[0])
except ValueError:
try:
name = enc2uni[i][1]
except KeyError:
name = ''
enc2uni[i] = (ord(buf[0]), name)
return enc2uni
def genwincodec(codepage):
import platform
map = genwinmap(codepage)
encodingname = 'cp%d' % codepage
code = codegen("", map, encodingname)
# Replace first lines with our own docstring
code = '''\
"""Python Character Mapping Codec %s generated on Windows:
%s with the command:
python Tools/unicode/genwincodec.py %s
"""#"
''' % (encodingname, ' '.join(platform.win32_ver()), codepage
) + code.split('"""#"', 1)[1]
print(code)
if __name__ == '__main__':
import sys
genwincodec(int(sys.argv[1]))

View file

@ -0,0 +1,7 @@
@rem Recreate some python charmap codecs from the Windows function
@rem MultiByteToWideChar.
@cd /d %~dp0
@mkdir build
@rem Arabic DOS code page
c:\python30\python genwincodec.py 720 > build/cp720.py

View file

@ -0,0 +1,41 @@
""" List all available codec modules.
(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
Licensed to PSF under a Contributor Agreement.
"""
import os, codecs, encodings
_debug = 0
def listcodecs(dir):
names = []
for filename in os.listdir(dir):
if filename[-3:] != '.py':
continue
name = filename[:-3]
# Check whether we've found a true codec
try:
codecs.lookup(name)
except LookupError:
# Codec not found
continue
except Exception as reason:
# Probably an error from importing the codec; still it's
# a valid code name
if _debug:
print('* problem importing codec %r: %s' % \
(name, reason))
names.append(name)
return names
if __name__ == '__main__':
names = listcodecs(encodings.__path__[0])
names.sort()
print('all_codecs = [')
for name in names:
print(' %r,' % name)
print(']')

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,431 @@
import re, sys
from unicodedata import ucd_3_2_0 as unicodedata
if sys.maxunicode == 65535:
raise RuntimeError("need UCS-4 Python")
def gen_category(cats):
for i in range(0, 0x110000):
if unicodedata.category(chr(i)) in cats:
yield(i)
def gen_bidirectional(cats):
for i in range(0, 0x110000):
if unicodedata.bidirectional(chr(i)) in cats:
yield(i)
def compact_set(l):
single = []
tuple = []
prev = None
span = 0
for e in l:
if prev is None:
prev = e
span = 0
continue
if prev+span+1 != e:
if span > 2:
tuple.append((prev,prev+span+1))
else:
for i in range(prev, prev+span+1):
single.append(i)
prev = e
span = 0
else:
span += 1
if span:
tuple.append((prev,prev+span+1))
else:
single.append(prev)
if not single and len(tuple) == 1:
tuple = "range(%d,%d)" % tuple[0]
else:
tuple = " + ".join("list(range(%d,%d))" % t for t in tuple)
if not single:
return "set(%s)" % tuple
if not tuple:
return "set(%r)" % (single,)
return "set(%r + %s)" % (single, tuple)
############## Read the tables in the RFC #######################
with open("rfc3454.txt") as f:
data = f.readlines()
tables = []
curname = None
for l in data:
l = l.strip()
if not l:
continue
# Skip RFC page breaks
if l.startswith(("Hoffman & Blanchet", "RFC 3454")):
continue
# Find start/end lines
m = re.match("----- (Start|End) Table ([A-Z](.[0-9])+) -----", l)
if m:
if m.group(1) == "Start":
if curname:
raise RuntimeError("Double Start", (curname, l))
curname = m.group(2)
table = {}
tables.append((curname, table))
continue
else:
if not curname:
raise RuntimeError("End without start", l)
if curname != m.group(2):
raise RuntimeError("Unexpected end", l)
curname = None
continue
if not curname:
continue
# Now we are in a table
fields = l.split(";")
if len(fields) > 1:
# Drop comment field
fields = fields[:-1]
if len(fields) == 1:
fields = fields[0].split("-")
if len(fields) > 1:
# range
try:
start, end = fields
except ValueError:
raise RuntimeError("Unpacking problem", l)
else:
start = end = fields[0]
start = int(start, 16)
end = int(end, 16)
for i in range(start, end+1):
table[i] = i
else:
code, value = fields
value = value.strip()
if value:
value = [int(v, 16) for v in value.split(" ")]
else:
# table B.1
value = None
table[int(code, 16)] = value
########### Generate compact Python versions of the tables #############
print("""# This file is generated by mkstringprep.py. DO NOT EDIT.
\"\"\"Library that exposes various tables found in the StringPrep RFC 3454.
There are two kinds of tables: sets, for which a member test is provided,
and mappings, for which a mapping function is provided.
\"\"\"
from unicodedata import ucd_3_2_0 as unicodedata
""")
print("assert unicodedata.unidata_version == %r" % (unicodedata.unidata_version,))
# A.1 is the table of unassigned characters
# XXX Plane 15 PUA is listed as unassigned in Python.
name, table = tables[0]
del tables[0]
assert name == "A.1"
table = set(table.keys())
Cn = set(gen_category(["Cn"]))
# FDD0..FDEF are process internal codes
Cn -= set(range(0xFDD0, 0xFDF0))
# not a character
Cn -= set(range(0xFFFE, 0x110000, 0x10000))
Cn -= set(range(0xFFFF, 0x110000, 0x10000))
# assert table == Cn
print("""
def in_table_a1(code):
if unicodedata.category(code) != 'Cn': return False
c = ord(code)
if 0xFDD0 <= c < 0xFDF0: return False
return (c & 0xFFFF) not in (0xFFFE, 0xFFFF)
""")
# B.1 cannot easily be derived
name, table = tables[0]
del tables[0]
assert name == "B.1"
table = sorted(table.keys())
print("""
b1_set = """ + compact_set(table) + """
def in_table_b1(code):
return ord(code) in b1_set
""")
# B.2 and B.3 is case folding.
# It takes CaseFolding.txt into account, which is
# not available in the Python database. Since
# B.2 is derived from B.3, we process B.3 first.
# B.3 supposedly *is* CaseFolding-3.2.0.txt.
name, table_b2 = tables[0]
del tables[0]
assert name == "B.2"
name, table_b3 = tables[0]
del tables[0]
assert name == "B.3"
# B.3 is mostly Python's .lower, except for a number
# of special cases, e.g. considering canonical forms.
b3_exceptions = {}
for k,v in table_b2.items():
if list(map(ord, chr(k).lower())) != v:
b3_exceptions[k] = "".join(map(chr,v))
b3 = sorted(b3_exceptions.items())
print("""
b3_exceptions = {""")
for i, kv in enumerate(b3):
print("0x%x:%a," % kv, end=' ')
if i % 4 == 3:
print()
print("}")
print("""
def map_table_b3(code):
r = b3_exceptions.get(ord(code))
if r is not None: return r
return code.lower()
""")
def map_table_b3(code):
r = b3_exceptions.get(ord(code))
if r is not None: return r
return code.lower()
# B.2 is case folding for NFKC. This is the same as B.3,
# except where NormalizeWithKC(Fold(a)) !=
# NormalizeWithKC(Fold(NormalizeWithKC(Fold(a))))
def map_table_b2(a):
al = map_table_b3(a)
b = unicodedata.normalize("NFKC", al)
bl = "".join([map_table_b3(ch) for ch in b])
c = unicodedata.normalize("NFKC", bl)
if b != c:
return c
else:
return al
specials = {}
for k,v in table_b2.items():
if list(map(ord, map_table_b2(chr(k)))) != v:
specials[k] = v
# B.3 should not add any additional special cases
assert specials == {}
print("""
def map_table_b2(a):
al = map_table_b3(a)
b = unicodedata.normalize("NFKC", al)
bl = "".join([map_table_b3(ch) for ch in b])
c = unicodedata.normalize("NFKC", bl)
if b != c:
return c
else:
return al
""")
# C.1.1 is a table with a single character
name, table = tables[0]
del tables[0]
assert name == "C.1.1"
assert table == {0x20:0x20}
print("""
def in_table_c11(code):
return code == " "
""")
# C.1.2 is the rest of all space characters
name, table = tables[0]
del tables[0]
assert name == "C.1.2"
# table = set(table.keys())
# Zs = set(gen_category(["Zs"])) - {0x20}
# assert Zs == table
print("""
def in_table_c12(code):
return unicodedata.category(code) == "Zs" and code != " "
def in_table_c11_c12(code):
return unicodedata.category(code) == "Zs"
""")
# C.2.1 ASCII control characters
name, table_c21 = tables[0]
del tables[0]
assert name == "C.2.1"
Cc = set(gen_category(["Cc"]))
Cc_ascii = Cc & set(range(128))
table_c21 = set(table_c21.keys())
assert Cc_ascii == table_c21
print("""
def in_table_c21(code):
return ord(code) < 128 and unicodedata.category(code) == "Cc"
""")
# C.2.2 Non-ASCII control characters. It also includes
# a number of characters in category Cf.
name, table_c22 = tables[0]
del tables[0]
assert name == "C.2.2"
Cc_nonascii = Cc - Cc_ascii
table_c22 = set(table_c22.keys())
assert len(Cc_nonascii - table_c22) == 0
specials = list(table_c22 - Cc_nonascii)
specials.sort()
print("""c22_specials = """ + compact_set(specials) + """
def in_table_c22(code):
c = ord(code)
if c < 128: return False
if unicodedata.category(code) == "Cc": return True
return c in c22_specials
def in_table_c21_c22(code):
return unicodedata.category(code) == "Cc" or \\
ord(code) in c22_specials
""")
# C.3 Private use
name, table = tables[0]
del tables[0]
assert name == "C.3"
Co = set(gen_category(["Co"]))
assert set(table.keys()) == Co
print("""
def in_table_c3(code):
return unicodedata.category(code) == "Co"
""")
# C.4 Non-character code points, xFFFE, xFFFF
# plus process internal codes
name, table = tables[0]
del tables[0]
assert name == "C.4"
nonchar = set(range(0xFDD0,0xFDF0))
nonchar.update(range(0xFFFE,0x110000,0x10000))
nonchar.update(range(0xFFFF,0x110000,0x10000))
table = set(table.keys())
assert table == nonchar
print("""
def in_table_c4(code):
c = ord(code)
if c < 0xFDD0: return False
if c < 0xFDF0: return True
return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF)
""")
# C.5 Surrogate codes
name, table = tables[0]
del tables[0]
assert name == "C.5"
Cs = set(gen_category(["Cs"]))
assert set(table.keys()) == Cs
print("""
def in_table_c5(code):
return unicodedata.category(code) == "Cs"
""")
# C.6 Inappropriate for plain text
name, table = tables[0]
del tables[0]
assert name == "C.6"
table = sorted(table.keys())
print("""
c6_set = """ + compact_set(table) + """
def in_table_c6(code):
return ord(code) in c6_set
""")
# C.7 Inappropriate for canonical representation
name, table = tables[0]
del tables[0]
assert name == "C.7"
table = sorted(table.keys())
print("""
c7_set = """ + compact_set(table) + """
def in_table_c7(code):
return ord(code) in c7_set
""")
# C.8 Change display properties or are deprecated
name, table = tables[0]
del tables[0]
assert name == "C.8"
table = sorted(table.keys())
print("""
c8_set = """ + compact_set(table) + """
def in_table_c8(code):
return ord(code) in c8_set
""")
# C.9 Tagging characters
name, table = tables[0]
del tables[0]
assert name == "C.9"
table = sorted(table.keys())
print("""
c9_set = """ + compact_set(table) + """
def in_table_c9(code):
return ord(code) in c9_set
""")
# D.1 Characters with bidirectional property "R" or "AL"
name, table = tables[0]
del tables[0]
assert name == "D.1"
RandAL = set(gen_bidirectional(["R","AL"]))
assert set(table.keys()) == RandAL
print("""
def in_table_d1(code):
return unicodedata.bidirectional(code) in ("R","AL")
""")
# D.2 Characters with bidirectional property "L"
name, table = tables[0]
del tables[0]
assert name == "D.2"
L = set(gen_bidirectional(["L"]))
assert set(table.keys()) == L
print("""
def in_table_d2(code):
return unicodedata.bidirectional(code) == "L"
""")

View file

@ -0,0 +1,291 @@
#
# Name: CP1140
# Unicode version: 3.2
# Table version: 1.0
# Table format: Format A
# Date: 2005-10-25
# Authors: Marc-Andre Lemburg <mal@egenix.com>
#
# This encoding is a modified CP037 encoding (with added Euro
# currency sign).
#
# (c) Copyright Marc-Andre Lemburg, 2005.
# Licensed to PSF under a Contributor Agreement.
#
# Based on the file
# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT
# which is:
#
# Copyright (c) 2002 Unicode, Inc. All Rights reserved.
#
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
# No claims are made as to fitness for any particular purpose. No
# warranties of any kind are expressed or implied. The recipient
# agrees to determine applicability of information provided. If this
# file has been provided on optical media by Unicode, Inc., the sole
# remedy for any claim will be exchange of defective media within 90
# days of receipt.
#
# Unicode, Inc. hereby grants the right to freely use the information
# supplied in this file in the creation of products supporting the
# Unicode Standard, and to make copies of this file in any form for
# internal or external distribution as long as this notice remains
# attached.
#
0x00 0x0000 #NULL
0x01 0x0001 #START OF HEADING
0x02 0x0002 #START OF TEXT
0x03 0x0003 #END OF TEXT
0x04 0x009C #CONTROL
0x05 0x0009 #HORIZONTAL TABULATION
0x06 0x0086 #CONTROL
0x07 0x007F #DELETE
0x08 0x0097 #CONTROL
0x09 0x008D #CONTROL
0x0A 0x008E #CONTROL
0x0B 0x000B #VERTICAL TABULATION
0x0C 0x000C #FORM FEED
0x0D 0x000D #CARRIAGE RETURN
0x0E 0x000E #SHIFT OUT
0x0F 0x000F #SHIFT IN
0x10 0x0010 #DATA LINK ESCAPE
0x11 0x0011 #DEVICE CONTROL ONE
0x12 0x0012 #DEVICE CONTROL TWO
0x13 0x0013 #DEVICE CONTROL THREE
0x14 0x009D #CONTROL
0x15 0x0085 #CONTROL
0x16 0x0008 #BACKSPACE
0x17 0x0087 #CONTROL
0x18 0x0018 #CANCEL
0x19 0x0019 #END OF MEDIUM
0x1A 0x0092 #CONTROL
0x1B 0x008F #CONTROL
0x1C 0x001C #FILE SEPARATOR
0x1D 0x001D #GROUP SEPARATOR
0x1E 0x001E #RECORD SEPARATOR
0x1F 0x001F #UNIT SEPARATOR
0x20 0x0080 #CONTROL
0x21 0x0081 #CONTROL
0x22 0x0082 #CONTROL
0x23 0x0083 #CONTROL
0x24 0x0084 #CONTROL
0x25 0x000A #LINE FEED
0x26 0x0017 #END OF TRANSMISSION BLOCK
0x27 0x001B #ESCAPE
0x28 0x0088 #CONTROL
0x29 0x0089 #CONTROL
0x2A 0x008A #CONTROL
0x2B 0x008B #CONTROL
0x2C 0x008C #CONTROL
0x2D 0x0005 #ENQUIRY
0x2E 0x0006 #ACKNOWLEDGE
0x2F 0x0007 #BELL
0x30 0x0090 #CONTROL
0x31 0x0091 #CONTROL
0x32 0x0016 #SYNCHRONOUS IDLE
0x33 0x0093 #CONTROL
0x34 0x0094 #CONTROL
0x35 0x0095 #CONTROL
0x36 0x0096 #CONTROL
0x37 0x0004 #END OF TRANSMISSION
0x38 0x0098 #CONTROL
0x39 0x0099 #CONTROL
0x3A 0x009A #CONTROL
0x3B 0x009B #CONTROL
0x3C 0x0014 #DEVICE CONTROL FOUR
0x3D 0x0015 #NEGATIVE ACKNOWLEDGE
0x3E 0x009E #CONTROL
0x3F 0x001A #SUBSTITUTE
0x40 0x0020 #SPACE
0x41 0x00A0 #NO-BREAK SPACE
0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
0x43 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE
0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE
0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE
0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE
0x4A 0x00A2 #CENT SIGN
0x4B 0x002E #FULL STOP
0x4C 0x003C #LESS-THAN SIGN
0x4D 0x0028 #LEFT PARENTHESIS
0x4E 0x002B #PLUS SIGN
0x4F 0x007C #VERTICAL LINE
0x50 0x0026 #AMPERSAND
0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX
0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE
0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE
0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS
0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE
0x59 0x00DF #LATIN SMALL LETTER SHARP S (GERMAN)
0x5A 0x0021 #EXCLAMATION MARK
0x5B 0x0024 #DOLLAR SIGN
0x5C 0x002A #ASTERISK
0x5D 0x0029 #RIGHT PARENTHESIS
0x5E 0x003B #SEMICOLON
0x5F 0x00AC #NOT SIGN
0x60 0x002D #HYPHEN-MINUS
0x61 0x002F #SOLIDUS
0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x63 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE
0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE
0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE
0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE
0x6A 0x00A6 #BROKEN BAR
0x6B 0x002C #COMMA
0x6C 0x0025 #PERCENT SIGN
0x6D 0x005F #LOW LINE
0x6E 0x003E #GREATER-THAN SIGN
0x6F 0x003F #QUESTION MARK
0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE
0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE
0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS
0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE
0x79 0x0060 #GRAVE ACCENT
0x7A 0x003A #COLON
0x7B 0x0023 #NUMBER SIGN
0x7C 0x0040 #COMMERCIAL AT
0x7D 0x0027 #APOSTROPHE
0x7E 0x003D #EQUALS SIGN
0x7F 0x0022 #QUOTATION MARK
0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
0x81 0x0061 #LATIN SMALL LETTER A
0x82 0x0062 #LATIN SMALL LETTER B
0x83 0x0063 #LATIN SMALL LETTER C
0x84 0x0064 #LATIN SMALL LETTER D
0x85 0x0065 #LATIN SMALL LETTER E
0x86 0x0066 #LATIN SMALL LETTER F
0x87 0x0067 #LATIN SMALL LETTER G
0x88 0x0068 #LATIN SMALL LETTER H
0x89 0x0069 #LATIN SMALL LETTER I
0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x8C 0x00F0 #LATIN SMALL LETTER ETH (ICELANDIC)
0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
0x8E 0x00FE #LATIN SMALL LETTER THORN (ICELANDIC)
0x8F 0x00B1 #PLUS-MINUS SIGN
0x90 0x00B0 #DEGREE SIGN
0x91 0x006A #LATIN SMALL LETTER J
0x92 0x006B #LATIN SMALL LETTER K
0x93 0x006C #LATIN SMALL LETTER L
0x94 0x006D #LATIN SMALL LETTER M
0x95 0x006E #LATIN SMALL LETTER N
0x96 0x006F #LATIN SMALL LETTER O
0x97 0x0070 #LATIN SMALL LETTER P
0x98 0x0071 #LATIN SMALL LETTER Q
0x99 0x0072 #LATIN SMALL LETTER R
0x9A 0x00AA #FEMININE ORDINAL INDICATOR
0x9B 0x00BA #MASCULINE ORDINAL INDICATOR
0x9C 0x00E6 #LATIN SMALL LIGATURE AE
0x9D 0x00B8 #CEDILLA
0x9E 0x00C6 #LATIN CAPITAL LIGATURE AE
#0x9F 0x00A4 #CURRENCY SIGN
0x9F 0x20AC # EURO SIGN
0xA0 0x00B5 #MICRO SIGN
0xA1 0x007E #TILDE
0xA2 0x0073 #LATIN SMALL LETTER S
0xA3 0x0074 #LATIN SMALL LETTER T
0xA4 0x0075 #LATIN SMALL LETTER U
0xA5 0x0076 #LATIN SMALL LETTER V
0xA6 0x0077 #LATIN SMALL LETTER W
0xA7 0x0078 #LATIN SMALL LETTER X
0xA8 0x0079 #LATIN SMALL LETTER Y
0xA9 0x007A #LATIN SMALL LETTER Z
0xAA 0x00A1 #INVERTED EXCLAMATION MARK
0xAB 0x00BF #INVERTED QUESTION MARK
0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (ICELANDIC)
0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
0xAE 0x00DE #LATIN CAPITAL LETTER THORN (ICELANDIC)
0xAF 0x00AE #REGISTERED SIGN
0xB0 0x005E #CIRCUMFLEX ACCENT
0xB1 0x00A3 #POUND SIGN
0xB2 0x00A5 #YEN SIGN
0xB3 0x00B7 #MIDDLE DOT
0xB4 0x00A9 #COPYRIGHT SIGN
0xB5 0x00A7 #SECTION SIGN
0xB6 0x00B6 #PILCROW SIGN
0xB7 0x00BC #VULGAR FRACTION ONE QUARTER
0xB8 0x00BD #VULGAR FRACTION ONE HALF
0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS
0xBA 0x005B #LEFT SQUARE BRACKET
0xBB 0x005D #RIGHT SQUARE BRACKET
0xBC 0x00AF #MACRON
0xBD 0x00A8 #DIAERESIS
0xBE 0x00B4 #ACUTE ACCENT
0xBF 0x00D7 #MULTIPLICATION SIGN
0xC0 0x007B #LEFT CURLY BRACKET
0xC1 0x0041 #LATIN CAPITAL LETTER A
0xC2 0x0042 #LATIN CAPITAL LETTER B
0xC3 0x0043 #LATIN CAPITAL LETTER C
0xC4 0x0044 #LATIN CAPITAL LETTER D
0xC5 0x0045 #LATIN CAPITAL LETTER E
0xC6 0x0046 #LATIN CAPITAL LETTER F
0xC7 0x0047 #LATIN CAPITAL LETTER G
0xC8 0x0048 #LATIN CAPITAL LETTER H
0xC9 0x0049 #LATIN CAPITAL LETTER I
0xCA 0x00AD #SOFT HYPHEN
0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
0xCC 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE
0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE
0xD0 0x007D #RIGHT CURLY BRACKET
0xD1 0x004A #LATIN CAPITAL LETTER J
0xD2 0x004B #LATIN CAPITAL LETTER K
0xD3 0x004C #LATIN CAPITAL LETTER L
0xD4 0x004D #LATIN CAPITAL LETTER M
0xD5 0x004E #LATIN CAPITAL LETTER N
0xD6 0x004F #LATIN CAPITAL LETTER O
0xD7 0x0050 #LATIN CAPITAL LETTER P
0xD8 0x0051 #LATIN CAPITAL LETTER Q
0xD9 0x0052 #LATIN CAPITAL LETTER R
0xDA 0x00B9 #SUPERSCRIPT ONE
0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX
0xDC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE
0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE
0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS
0xE0 0x005C #REVERSE SOLIDUS
0xE1 0x00F7 #DIVISION SIGN
0xE2 0x0053 #LATIN CAPITAL LETTER S
0xE3 0x0054 #LATIN CAPITAL LETTER T
0xE4 0x0055 #LATIN CAPITAL LETTER U
0xE5 0x0056 #LATIN CAPITAL LETTER V
0xE6 0x0057 #LATIN CAPITAL LETTER W
0xE7 0x0058 #LATIN CAPITAL LETTER X
0xE8 0x0059 #LATIN CAPITAL LETTER Y
0xE9 0x005A #LATIN CAPITAL LETTER Z
0xEA 0x00B2 #SUPERSCRIPT TWO
0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0xEC 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE
0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE
0xF0 0x0030 #DIGIT ZERO
0xF1 0x0031 #DIGIT ONE
0xF2 0x0032 #DIGIT TWO
0xF3 0x0033 #DIGIT THREE
0xF4 0x0034 #DIGIT FOUR
0xF5 0x0035 #DIGIT FIVE
0xF6 0x0036 #DIGIT SIX
0xF7 0x0037 #DIGIT SEVEN
0xF8 0x0038 #DIGIT EIGHT
0xF9 0x0039 #DIGIT NINE
0xFA 0x00B3 #SUPERSCRIPT THREE
0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0xFC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE
0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
0xFF 0x009F #CONTROL

View file

@ -0,0 +1,258 @@
0x00 0x0000 #NULL (NUL)
0x01 0x0001 #START OF HEADING (SOH)
0x02 0x0002 #START OF TEXT (STX)
0x03 0x0003 #END OF TEXT (ETX)
0x04 0x009C #STRING TERMINATOR (ST)
0x05 0x0009 #CHARACTER TABULATION (HT)
0x06 0x0086 #START OF SELECTED AREA (SSA)
0x07 0x007F #DELETE (DEL)
0x08 0x0097 #END OF GUARDED AREA (EPA)
0x09 0x008D #REVERSE LINE FEED (RI)
0x0A 0x008E #SINGLE-SHIFT TWO (SS2)
0x0B 0x000B #LINE TABULATION (VT)
0x0C 0x000C #FORM FEED (FF)
0x0D 0x000D #CARRIAGE RETURN (CR)
0x0E 0x000E #SHIFT OUT (SO)
0x0F 0x000F #SHIFT IN (SI)
0x10 0x0010 #DATALINK ESCAPE (DLE)
0x11 0x0011 #DEVICE CONTROL ONE (DC1)
0x12 0x0012 #DEVICE CONTROL TWO (DC2)
0x13 0x0013 #DEVICE CONTROL THREE (DC3)
0x14 0x009D #OPERATING SYSTEM COMMAND (OSC)
0x15 0x0085 #NEXT LINE (NEL)
0x16 0x0008 #BACKSPACE (BS)
0x17 0x0087 #END OF SELECTED AREA (ESA)
0x18 0x0018 #CANCEL (CAN)
0x19 0x0019 #END OF MEDIUM (EM)
0x1A 0x0092 #PRIVATE USE TWO (PU2)
0x1B 0x008F #SINGLE-SHIFT THREE (SS3)
0x1C 0x001C #FILE SEPARATOR (IS4)
0x1D 0x001D #GROUP SEPARATOR (IS3)
0x1E 0x001E #RECORD SEPARATOR (IS2)
0x1F 0x001F #UNIT SEPARATOR (IS1)
0x20 0x0080 #PADDING CHARACTER (PAD)
0x21 0x0081 #HIGH OCTET PRESET (HOP)
0x22 0x0082 #BREAK PERMITTED HERE (BPH)
0x23 0x0083 #NO BREAK HERE (NBH)
0x24 0x0084 #INDEX (IND)
0x25 0x000A #LINE FEED (LF)
0x26 0x0017 #END OF TRANSMISSION BLOCK (ETB)
0x27 0x001B #ESCAPE (ESC)
0x28 0x0088 #CHARACTER TABULATION SET (HTS)
0x29 0x0089 #CHARACTER TABULATION WITH JUSTIFICATION (HTJ)
0x2A 0x008A #LINE TABULATION SET (VTS)
0x2B 0x008B #PARTIAL LINE FORWARD (PLD)
0x2C 0x008C #PARTIAL LINE BACKWARD (PLU)
0x2D 0x0005 #ENQUIRY (ENQ)
0x2E 0x0006 #ACKNOWLEDGE (ACK)
0x2F 0x0007 #BELL (BEL)
0x30 0x0090 #DEVICE CONTROL STRING (DCS)
0x31 0x0091 #PRIVATE USE ONE (PU1)
0x32 0x0016 #SYNCHRONOUS IDLE (SYN)
0x33 0x0093 #SET TRANSMIT STATE (STS)
0x34 0x0094 #CANCEL CHARACTER (CCH)
0x35 0x0095 #MESSAGE WAITING (MW)
0x36 0x0096 #START OF GUARDED AREA (SPA)
0x37 0x0004 #END OF TRANSMISSION (EOT)
0x38 0x0098 #START OF STRING (SOS)
0x39 0x0099 #SINGLE GRAPHIC CHARACTER INTRODUCER (SGCI)
0x3A 0x009A #SINGLE CHARACTER INTRODUCER (SCI)
0x3B 0x009B #CONTROL SEQUENCE INTRODUCER (CSI)
0x3C 0x0014 #DEVICE CONTROL FOUR (DC4)
0x3D 0x0015 #NEGATIVE ACKNOWLEDGE (NAK)
0x3E 0x009E #PRIVACY MESSAGE (PM)
0x3F 0x001A #SUBSTITUTE (SUB)
0x40 0x0020 #SPACE
0x41 0x00A0 #NO-BREAK SPACE
0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
0x43 0x007B #LEFT CURLY BRACKET
0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE
0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE
0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE
0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE
0x4A 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
0x4B 0x002E #FULL STOP
0x4C 0x003C #LESS-THAN SIGN
0x4D 0x0028 #LEFT PARENTHESIS
0x4E 0x002B #PLUS SIGN
0x4F 0x0021 #EXCLAMATION MARK
0x50 0x0026 #AMPERSAND
0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX
0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE
0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE
0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS
0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE
0x59 0x007E #TILDE
0x5A 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
0x5B 0x0024 #DOLLAR SIGN
0x5C 0x002A #ASTERISK
0x5D 0x0029 #RIGHT PARENTHESIS
0x5E 0x003B #SEMICOLON
0x5F 0x005E #CIRCUMFLEX ACCENT
0x60 0x002D #HYPHEN-MINUS
0x61 0x002F #SOLIDUS
0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x63 0x005B #LEFT SQUARE BRACKET
0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE
0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE
0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE
0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE
0x6A 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
0x6B 0x002C #COMMA
0x6C 0x0025 #PERCENT SIGN
0x6D 0x005F #LOW LINE
0x6E 0x003E #GREATER-THAN SIGN
0x6F 0x003F #QUESTION MARK
0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE
0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE
0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS
0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE
0x79 0x0060 #GRAVE ACCENT
0x7A 0x003A #COLON
0x7B 0x0023 #NUMBER SIGN
0x7C 0x00A7 #SECTION SIGN
0x7D 0x0027 #APOSTROPHE
0x7E 0x003D #EQUALS SIGN
0x7F 0x0022 #QUOTATION MARK
0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
0x81 0x0061 #LATIN SMALL LETTER A
0x82 0x0062 #LATIN SMALL LETTER B
0x83 0x0063 #LATIN SMALL LETTER C
0x84 0x0064 #LATIN SMALL LETTER D
0x85 0x0065 #LATIN SMALL LETTER E
0x86 0x0066 #LATIN SMALL LETTER F
0x87 0x0067 #LATIN SMALL LETTER G
0x88 0x0068 #LATIN SMALL LETTER H
0x89 0x0069 #LATIN SMALL LETTER I
0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x8C 0x00F0 #LATIN SMALL LETTER ETH (Icelandic)
0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
0x8E 0x00FE #LATIN SMALL LETTER THORN (Icelandic)
0x8F 0x00B1 #PLUS-MINUS SIGN
0x90 0x00B0 #DEGREE SIGN
0x91 0x006A #LATIN SMALL LETTER J
0x92 0x006B #LATIN SMALL LETTER K
0x93 0x006C #LATIN SMALL LETTER L
0x94 0x006D #LATIN SMALL LETTER M
0x95 0x006E #LATIN SMALL LETTER N
0x96 0x006F #LATIN SMALL LETTER O
0x97 0x0070 #LATIN SMALL LETTER P
0x98 0x0071 #LATIN SMALL LETTER Q
0x99 0x0072 #LATIN SMALL LETTER R
0x9A 0x00AA #FEMININE ORDINAL INDICATOR
0x9B 0x00BA #MASCULINE ORDINAL INDICATOR
0x9C 0x00E6 #LATIN SMALL LETTER AE
0x9D 0x00B8 #CEDILLA
0x9E 0x00C6 #LATIN CAPITAL LETTER AE
0x9F 0x00A4 #CURRENCY SIGN
0xA0 0x00B5 #MICRO SIGN
0xA1 0x00DF #LATIN SMALL LETTER SHARP S (German)
0xA2 0x0073 #LATIN SMALL LETTER S
0xA3 0x0074 #LATIN SMALL LETTER T
0xA4 0x0075 #LATIN SMALL LETTER U
0xA5 0x0076 #LATIN SMALL LETTER V
0xA6 0x0077 #LATIN SMALL LETTER W
0xA7 0x0078 #LATIN SMALL LETTER X
0xA8 0x0079 #LATIN SMALL LETTER Y
0xA9 0x007A #LATIN SMALL LETTER Z
0xAA 0x00A1 #INVERTED EXCLAMATION MARK
0xAB 0x00BF #INVERTED QUESTION MARK
0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (Icelandic)
0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
0xAE 0x00DE #LATIN CAPITAL LETTER THORN (Icelandic)
0xAF 0x00AE #REGISTERED SIGN
0xB0 0x00A2 #CENT SIGN
0xB1 0x00A3 #POUND SIGN
0xB2 0x00A5 #YEN SIGN
0xB3 0x00B7 #MIDDLE DOT
0xB4 0x00A9 #COPYRIGHT SIGN
0xB5 0x0040 #COMMERCIAL AT
0xB6 0x00B6 #PILCROW SIGN
0xB7 0x00BC #VULGAR FRACTION ONE QUARTER
0xB8 0x00BD #VULGAR FRACTION ONE HALF
0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS
0xBA 0x00AC #NOT SIGN
0xBB 0x007C #VERTICAL LINE
0xBC 0x203E #OVERLINE
0xBD 0x00A8 #DIAERESIS
0xBE 0x00B4 #ACUTE ACCENT
0xBF 0x00D7 #MULTIPLICATION SIGN
0xC0 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
0xC1 0x0041 #LATIN CAPITAL LETTER A
0xC2 0x0042 #LATIN CAPITAL LETTER B
0xC3 0x0043 #LATIN CAPITAL LETTER C
0xC4 0x0044 #LATIN CAPITAL LETTER D
0xC5 0x0045 #LATIN CAPITAL LETTER E
0xC6 0x0046 #LATIN CAPITAL LETTER F
0xC7 0x0047 #LATIN CAPITAL LETTER G
0xC8 0x0048 #LATIN CAPITAL LETTER H
0xC9 0x0049 #LATIN CAPITAL LETTER I
0xCA 0x00AD #SOFT HYPHEN
0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
0xCC 0x00A6 #BROKEN BAR
0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE
0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE
0xD0 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
0xD1 0x004A #LATIN CAPITAL LETTER J
0xD2 0x004B #LATIN CAPITAL LETTER K
0xD3 0x004C #LATIN CAPITAL LETTER L
0xD4 0x004D #LATIN CAPITAL LETTER M
0xD5 0x004E #LATIN CAPITAL LETTER N
0xD6 0x004F #LATIN CAPITAL LETTER O
0xD7 0x0050 #LATIN CAPITAL LETTER P
0xD8 0x0051 #LATIN CAPITAL LETTER Q
0xD9 0x0052 #LATIN CAPITAL LETTER R
0xDA 0x00B9 #SUPERSCRIPT ONE
0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX
0xDC 0x007D #RIGHT CURLY BRACKET
0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE
0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE
0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS
0xE0 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
0xE1 0x00F7 #DIVISION SIGN
0xE2 0x0053 #LATIN CAPITAL LETTER S
0xE3 0x0054 #LATIN CAPITAL LETTER T
0xE4 0x0055 #LATIN CAPITAL LETTER U
0xE5 0x0056 #LATIN CAPITAL LETTER V
0xE6 0x0057 #LATIN CAPITAL LETTER W
0xE7 0x0058 #LATIN CAPITAL LETTER X
0xE8 0x0059 #LATIN CAPITAL LETTER Y
0xE9 0x005A #LATIN CAPITAL LETTER Z
0xEA 0x00B2 #SUPERSCRIPT TWO
0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0xEC 0x005C #REVERSE SOLIDUS
0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE
0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE
0xF0 0x0030 #DIGIT ZERO
0xF1 0x0031 #DIGIT ONE
0xF2 0x0032 #DIGIT TWO
0xF3 0x0033 #DIGIT THREE
0xF4 0x0034 #DIGIT FOUR
0xF5 0x0035 #DIGIT FIVE
0xF6 0x0036 #DIGIT SIX
0xF7 0x0037 #DIGIT SEVEN
0xF8 0x0038 #DIGIT EIGHT
0xF9 0x0039 #DIGIT NINE
0xFA 0x00B3 #SUPERSCRIPT THREE
0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0xFC 0x005D #RIGHT SQUARE BRACKET
0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE
0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
0xFF 0x009F #APPLICATION PROGRAM COMMAND (APC)

View file

@ -0,0 +1,298 @@
#
# Name: KOI8-U (RFC2319) to Unicode
# Unicode version: 3.2
# Table version: 1.0
# Table format: Format A
# Date: 2005-10-25
# Authors: Marc-Andre Lemburg <mal@egenix.com>
#
# See RFC2319 for details. This encoding is a modified KOI8-R
# encoding.
#
# (c) Copyright Marc-Andre Lemburg, 2005.
# Licensed to PSF under a Contributor Agreement.
#
# Based on the file
# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
# which is:
#
# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
#
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
# No claims are made as to fitness for any particular purpose. No
# warranties of any kind are expressed or implied. The recipient
# agrees to determine applicability of information provided. If this
# file has been provided on optical media by Unicode, Inc., the sole
# remedy for any claim will be exchange of defective media within 90
# days of receipt.
#
# Unicode, Inc. hereby grants the right to freely use the information
# supplied in this file in the creation of products supporting the
# Unicode Standard, and to make copies of this file in any form for
# internal or external distribution as long as this notice remains
# attached.
#
0x00 0x0000 # NULL
0x01 0x0001 # START OF HEADING
0x02 0x0002 # START OF TEXT
0x03 0x0003 # END OF TEXT
0x04 0x0004 # END OF TRANSMISSION
0x05 0x0005 # ENQUIRY
0x06 0x0006 # ACKNOWLEDGE
0x07 0x0007 # BELL
0x08 0x0008 # BACKSPACE
0x09 0x0009 # HORIZONTAL TABULATION
0x0A 0x000A # LINE FEED
0x0B 0x000B # VERTICAL TABULATION
0x0C 0x000C # FORM FEED
0x0D 0x000D # CARRIAGE RETURN
0x0E 0x000E # SHIFT OUT
0x0F 0x000F # SHIFT IN
0x10 0x0010 # DATA LINK ESCAPE
0x11 0x0011 # DEVICE CONTROL ONE
0x12 0x0012 # DEVICE CONTROL TWO
0x13 0x0013 # DEVICE CONTROL THREE
0x14 0x0014 # DEVICE CONTROL FOUR
0x15 0x0015 # NEGATIVE ACKNOWLEDGE
0x16 0x0016 # SYNCHRONOUS IDLE
0x17 0x0017 # END OF TRANSMISSION BLOCK
0x18 0x0018 # CANCEL
0x19 0x0019 # END OF MEDIUM
0x1A 0x001A # SUBSTITUTE
0x1B 0x001B # ESCAPE
0x1C 0x001C # FILE SEPARATOR
0x1D 0x001D # GROUP SEPARATOR
0x1E 0x001E # RECORD SEPARATOR
0x1F 0x001F # UNIT SEPARATOR
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
0x23 0x0023 # NUMBER SIGN
0x24 0x0024 # DOLLAR SIGN
0x25 0x0025 # PERCENT SIGN
0x26 0x0026 # AMPERSAND
0x27 0x0027 # APOSTROPHE
0x28 0x0028 # LEFT PARENTHESIS
0x29 0x0029 # RIGHT PARENTHESIS
0x2A 0x002A # ASTERISK
0x2B 0x002B # PLUS SIGN
0x2C 0x002C # COMMA
0x2D 0x002D # HYPHEN-MINUS
0x2E 0x002E # FULL STOP
0x2F 0x002F # SOLIDUS
0x30 0x0030 # DIGIT ZERO
0x31 0x0031 # DIGIT ONE
0x32 0x0032 # DIGIT TWO
0x33 0x0033 # DIGIT THREE
0x34 0x0034 # DIGIT FOUR
0x35 0x0035 # DIGIT FIVE
0x36 0x0036 # DIGIT SIX
0x37 0x0037 # DIGIT SEVEN
0x38 0x0038 # DIGIT EIGHT
0x39 0x0039 # DIGIT NINE
0x3A 0x003A # COLON
0x3B 0x003B # SEMICOLON
0x3C 0x003C # LESS-THAN SIGN
0x3D 0x003D # EQUALS SIGN
0x3E 0x003E # GREATER-THAN SIGN
0x3F 0x003F # QUESTION MARK
0x40 0x0040 # COMMERCIAL AT
0x41 0x0041 # LATIN CAPITAL LETTER A
0x42 0x0042 # LATIN CAPITAL LETTER B
0x43 0x0043 # LATIN CAPITAL LETTER C
0x44 0x0044 # LATIN CAPITAL LETTER D
0x45 0x0045 # LATIN CAPITAL LETTER E
0x46 0x0046 # LATIN CAPITAL LETTER F
0x47 0x0047 # LATIN CAPITAL LETTER G
0x48 0x0048 # LATIN CAPITAL LETTER H
0x49 0x0049 # LATIN CAPITAL LETTER I
0x4A 0x004A # LATIN CAPITAL LETTER J
0x4B 0x004B # LATIN CAPITAL LETTER K
0x4C 0x004C # LATIN CAPITAL LETTER L
0x4D 0x004D # LATIN CAPITAL LETTER M
0x4E 0x004E # LATIN CAPITAL LETTER N
0x4F 0x004F # LATIN CAPITAL LETTER O
0x50 0x0050 # LATIN CAPITAL LETTER P
0x51 0x0051 # LATIN CAPITAL LETTER Q
0x52 0x0052 # LATIN CAPITAL LETTER R
0x53 0x0053 # LATIN CAPITAL LETTER S
0x54 0x0054 # LATIN CAPITAL LETTER T
0x55 0x0055 # LATIN CAPITAL LETTER U
0x56 0x0056 # LATIN CAPITAL LETTER V
0x57 0x0057 # LATIN CAPITAL LETTER W
0x58 0x0058 # LATIN CAPITAL LETTER X
0x59 0x0059 # LATIN CAPITAL LETTER Y
0x5A 0x005A # LATIN CAPITAL LETTER Z
0x5B 0x005B # LEFT SQUARE BRACKET
0x5C 0x005C # REVERSE SOLIDUS
0x5D 0x005D # RIGHT SQUARE BRACKET
0x5E 0x005E # CIRCUMFLEX ACCENT
0x5F 0x005F # LOW LINE
0x60 0x0060 # GRAVE ACCENT
0x61 0x0061 # LATIN SMALL LETTER A
0x62 0x0062 # LATIN SMALL LETTER B
0x63 0x0063 # LATIN SMALL LETTER C
0x64 0x0064 # LATIN SMALL LETTER D
0x65 0x0065 # LATIN SMALL LETTER E
0x66 0x0066 # LATIN SMALL LETTER F
0x67 0x0067 # LATIN SMALL LETTER G
0x68 0x0068 # LATIN SMALL LETTER H
0x69 0x0069 # LATIN SMALL LETTER I
0x6A 0x006A # LATIN SMALL LETTER J
0x6B 0x006B # LATIN SMALL LETTER K
0x6C 0x006C # LATIN SMALL LETTER L
0x6D 0x006D # LATIN SMALL LETTER M
0x6E 0x006E # LATIN SMALL LETTER N
0x6F 0x006F # LATIN SMALL LETTER O
0x70 0x0070 # LATIN SMALL LETTER P
0x71 0x0071 # LATIN SMALL LETTER Q
0x72 0x0072 # LATIN SMALL LETTER R
0x73 0x0073 # LATIN SMALL LETTER S
0x74 0x0074 # LATIN SMALL LETTER T
0x75 0x0075 # LATIN SMALL LETTER U
0x76 0x0076 # LATIN SMALL LETTER V
0x77 0x0077 # LATIN SMALL LETTER W
0x78 0x0078 # LATIN SMALL LETTER X
0x79 0x0079 # LATIN SMALL LETTER Y
0x7A 0x007A # LATIN SMALL LETTER Z
0x7B 0x007B # LEFT CURLY BRACKET
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
0x7F 0x007F # DELETE
0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL
0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL
0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT
0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT
0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT
0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x8B 0x2580 # UPPER HALF BLOCK
0x8C 0x2584 # LOWER HALF BLOCK
0x8D 0x2588 # FULL BLOCK
0x8E 0x258C # LEFT HALF BLOCK
0x8F 0x2590 # RIGHT HALF BLOCK
0x90 0x2591 # LIGHT SHADE
0x91 0x2592 # MEDIUM SHADE
0x92 0x2593 # DARK SHADE
0x93 0x2320 # TOP HALF INTEGRAL
0x94 0x25A0 # BLACK SQUARE
0x95 0x2219 # BULLET OPERATOR
0x96 0x221A # SQUARE ROOT
0x97 0x2248 # ALMOST EQUAL TO
0x98 0x2264 # LESS-THAN OR EQUAL TO
0x99 0x2265 # GREATER-THAN OR EQUAL TO
0x9A 0x00A0 # NO-BREAK SPACE
0x9B 0x2321 # BOTTOM HALF INTEGRAL
0x9C 0x00B0 # DEGREE SIGN
0x9D 0x00B2 # SUPERSCRIPT TWO
0x9E 0x00B7 # MIDDLE DOT
0x9F 0x00F7 # DIVISION SIGN
0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL
0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL
0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0xA3 0x0451 # CYRILLIC SMALL LETTER IO
#0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE
0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT
#0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0xA6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
#0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0xA7 0x0457 # CYRILLIC SMALL LETTER YI (UKRAINIAN)
0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT
0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT
0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
#0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0xAD 0x0491 # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT
0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO
#0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
#0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0xB6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
#0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
#0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0xBD 0x0490 # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0xBF 0x00A9 # COPYRIGHT SIGN
0xC0 0x044E # CYRILLIC SMALL LETTER YU
0xC1 0x0430 # CYRILLIC SMALL LETTER A
0xC2 0x0431 # CYRILLIC SMALL LETTER BE
0xC3 0x0446 # CYRILLIC SMALL LETTER TSE
0xC4 0x0434 # CYRILLIC SMALL LETTER DE
0xC5 0x0435 # CYRILLIC SMALL LETTER IE
0xC6 0x0444 # CYRILLIC SMALL LETTER EF
0xC7 0x0433 # CYRILLIC SMALL LETTER GHE
0xC8 0x0445 # CYRILLIC SMALL LETTER HA
0xC9 0x0438 # CYRILLIC SMALL LETTER I
0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I
0xCB 0x043A # CYRILLIC SMALL LETTER KA
0xCC 0x043B # CYRILLIC SMALL LETTER EL
0xCD 0x043C # CYRILLIC SMALL LETTER EM
0xCE 0x043D # CYRILLIC SMALL LETTER EN
0xCF 0x043E # CYRILLIC SMALL LETTER O
0xD0 0x043F # CYRILLIC SMALL LETTER PE
0xD1 0x044F # CYRILLIC SMALL LETTER YA
0xD2 0x0440 # CYRILLIC SMALL LETTER ER
0xD3 0x0441 # CYRILLIC SMALL LETTER ES
0xD4 0x0442 # CYRILLIC SMALL LETTER TE
0xD5 0x0443 # CYRILLIC SMALL LETTER U
0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE
0xD7 0x0432 # CYRILLIC SMALL LETTER VE
0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
0xD9 0x044B # CYRILLIC SMALL LETTER YERU
0xDA 0x0437 # CYRILLIC SMALL LETTER ZE
0xDB 0x0448 # CYRILLIC SMALL LETTER SHA
0xDC 0x044D # CYRILLIC SMALL LETTER E
0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA
0xDE 0x0447 # CYRILLIC SMALL LETTER CHE
0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN
0xE0 0x042E # CYRILLIC CAPITAL LETTER YU
0xE1 0x0410 # CYRILLIC CAPITAL LETTER A
0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE
0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE
0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE
0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE
0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF
0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE
0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA
0xE9 0x0418 # CYRILLIC CAPITAL LETTER I
0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
0xEB 0x041A # CYRILLIC CAPITAL LETTER KA
0xEC 0x041B # CYRILLIC CAPITAL LETTER EL
0xED 0x041C # CYRILLIC CAPITAL LETTER EM
0xEE 0x041D # CYRILLIC CAPITAL LETTER EN
0xEF 0x041E # CYRILLIC CAPITAL LETTER O
0xF0 0x041F # CYRILLIC CAPITAL LETTER PE
0xF1 0x042F # CYRILLIC CAPITAL LETTER YA
0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER
0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES
0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE
0xF5 0x0423 # CYRILLIC CAPITAL LETTER U
0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE
0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE
0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU
0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE
0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA
0xFC 0x042D # CYRILLIC CAPITAL LETTER E
0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE
0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN

View file

@ -0,0 +1,284 @@
#
# Name: TIS-620
# Unicode version: 3.2
# Table version: 1.0
# Table format: Format A
# Date: 2005-10-25
# Authors: Marc-Andre Lemburg <mal@egenix.com>
#
# According to
# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the
# TIS-620 is the identical to ISO_8859-11 with the 0xA0
# (no-break space) mapping removed.
#
# (c) Copyright Marc-Andre Lemburg, 2005.
# Licensed to PSF under a Contributor Agreement.
#
# Based on the file
# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT
# which is:
#
# Copyright (c) 2002 Unicode, Inc. All Rights reserved.
#
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
# No claims are made as to fitness for any particular purpose. No
# warranties of any kind are expressed or implied. The recipient
# agrees to determine applicability of information provided. If this
# file has been provided on optical media by Unicode, Inc., the sole
# remedy for any claim will be exchange of defective media within 90
# days of receipt.
#
# Unicode, Inc. hereby grants the right to freely use the information
# supplied in this file in the creation of products supporting the
# Unicode Standard, and to make copies of this file in any form for
# internal or external distribution as long as this notice remains
# attached.
#
0x00 0x0000 # NULL
0x01 0x0001 # START OF HEADING
0x02 0x0002 # START OF TEXT
0x03 0x0003 # END OF TEXT
0x04 0x0004 # END OF TRANSMISSION
0x05 0x0005 # ENQUIRY
0x06 0x0006 # ACKNOWLEDGE
0x07 0x0007 # BELL
0x08 0x0008 # BACKSPACE
0x09 0x0009 # HORIZONTAL TABULATION
0x0A 0x000A # LINE FEED
0x0B 0x000B # VERTICAL TABULATION
0x0C 0x000C # FORM FEED
0x0D 0x000D # CARRIAGE RETURN
0x0E 0x000E # SHIFT OUT
0x0F 0x000F # SHIFT IN
0x10 0x0010 # DATA LINK ESCAPE
0x11 0x0011 # DEVICE CONTROL ONE
0x12 0x0012 # DEVICE CONTROL TWO
0x13 0x0013 # DEVICE CONTROL THREE
0x14 0x0014 # DEVICE CONTROL FOUR
0x15 0x0015 # NEGATIVE ACKNOWLEDGE
0x16 0x0016 # SYNCHRONOUS IDLE
0x17 0x0017 # END OF TRANSMISSION BLOCK
0x18 0x0018 # CANCEL
0x19 0x0019 # END OF MEDIUM
0x1A 0x001A # SUBSTITUTE
0x1B 0x001B # ESCAPE
0x1C 0x001C # FILE SEPARATOR
0x1D 0x001D # GROUP SEPARATOR
0x1E 0x001E # RECORD SEPARATOR
0x1F 0x001F # UNIT SEPARATOR
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
0x23 0x0023 # NUMBER SIGN
0x24 0x0024 # DOLLAR SIGN
0x25 0x0025 # PERCENT SIGN
0x26 0x0026 # AMPERSAND
0x27 0x0027 # APOSTROPHE
0x28 0x0028 # LEFT PARENTHESIS
0x29 0x0029 # RIGHT PARENTHESIS
0x2A 0x002A # ASTERISK
0x2B 0x002B # PLUS SIGN
0x2C 0x002C # COMMA
0x2D 0x002D # HYPHEN-MINUS
0x2E 0x002E # FULL STOP
0x2F 0x002F # SOLIDUS
0x30 0x0030 # DIGIT ZERO
0x31 0x0031 # DIGIT ONE
0x32 0x0032 # DIGIT TWO
0x33 0x0033 # DIGIT THREE
0x34 0x0034 # DIGIT FOUR
0x35 0x0035 # DIGIT FIVE
0x36 0x0036 # DIGIT SIX
0x37 0x0037 # DIGIT SEVEN
0x38 0x0038 # DIGIT EIGHT
0x39 0x0039 # DIGIT NINE
0x3A 0x003A # COLON
0x3B 0x003B # SEMICOLON
0x3C 0x003C # LESS-THAN SIGN
0x3D 0x003D # EQUALS SIGN
0x3E 0x003E # GREATER-THAN SIGN
0x3F 0x003F # QUESTION MARK
0x40 0x0040 # COMMERCIAL AT
0x41 0x0041 # LATIN CAPITAL LETTER A
0x42 0x0042 # LATIN CAPITAL LETTER B
0x43 0x0043 # LATIN CAPITAL LETTER C
0x44 0x0044 # LATIN CAPITAL LETTER D
0x45 0x0045 # LATIN CAPITAL LETTER E
0x46 0x0046 # LATIN CAPITAL LETTER F
0x47 0x0047 # LATIN CAPITAL LETTER G
0x48 0x0048 # LATIN CAPITAL LETTER H
0x49 0x0049 # LATIN CAPITAL LETTER I
0x4A 0x004A # LATIN CAPITAL LETTER J
0x4B 0x004B # LATIN CAPITAL LETTER K
0x4C 0x004C # LATIN CAPITAL LETTER L
0x4D 0x004D # LATIN CAPITAL LETTER M
0x4E 0x004E # LATIN CAPITAL LETTER N
0x4F 0x004F # LATIN CAPITAL LETTER O
0x50 0x0050 # LATIN CAPITAL LETTER P
0x51 0x0051 # LATIN CAPITAL LETTER Q
0x52 0x0052 # LATIN CAPITAL LETTER R
0x53 0x0053 # LATIN CAPITAL LETTER S
0x54 0x0054 # LATIN CAPITAL LETTER T
0x55 0x0055 # LATIN CAPITAL LETTER U
0x56 0x0056 # LATIN CAPITAL LETTER V
0x57 0x0057 # LATIN CAPITAL LETTER W
0x58 0x0058 # LATIN CAPITAL LETTER X
0x59 0x0059 # LATIN CAPITAL LETTER Y
0x5A 0x005A # LATIN CAPITAL LETTER Z
0x5B 0x005B # LEFT SQUARE BRACKET
0x5C 0x005C # REVERSE SOLIDUS
0x5D 0x005D # RIGHT SQUARE BRACKET
0x5E 0x005E # CIRCUMFLEX ACCENT
0x5F 0x005F # LOW LINE
0x60 0x0060 # GRAVE ACCENT
0x61 0x0061 # LATIN SMALL LETTER A
0x62 0x0062 # LATIN SMALL LETTER B
0x63 0x0063 # LATIN SMALL LETTER C
0x64 0x0064 # LATIN SMALL LETTER D
0x65 0x0065 # LATIN SMALL LETTER E
0x66 0x0066 # LATIN SMALL LETTER F
0x67 0x0067 # LATIN SMALL LETTER G
0x68 0x0068 # LATIN SMALL LETTER H
0x69 0x0069 # LATIN SMALL LETTER I
0x6A 0x006A # LATIN SMALL LETTER J
0x6B 0x006B # LATIN SMALL LETTER K
0x6C 0x006C # LATIN SMALL LETTER L
0x6D 0x006D # LATIN SMALL LETTER M
0x6E 0x006E # LATIN SMALL LETTER N
0x6F 0x006F # LATIN SMALL LETTER O
0x70 0x0070 # LATIN SMALL LETTER P
0x71 0x0071 # LATIN SMALL LETTER Q
0x72 0x0072 # LATIN SMALL LETTER R
0x73 0x0073 # LATIN SMALL LETTER S
0x74 0x0074 # LATIN SMALL LETTER T
0x75 0x0075 # LATIN SMALL LETTER U
0x76 0x0076 # LATIN SMALL LETTER V
0x77 0x0077 # LATIN SMALL LETTER W
0x78 0x0078 # LATIN SMALL LETTER X
0x79 0x0079 # LATIN SMALL LETTER Y
0x7A 0x007A # LATIN SMALL LETTER Z
0x7B 0x007B # LEFT CURLY BRACKET
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
0x7F 0x007F # DELETE
0x80 0x0080 # <control>
0x81 0x0081 # <control>
0x82 0x0082 # <control>
0x83 0x0083 # <control>
0x84 0x0084 # <control>
0x85 0x0085 # <control>
0x86 0x0086 # <control>
0x87 0x0087 # <control>
0x88 0x0088 # <control>
0x89 0x0089 # <control>
0x8A 0x008A # <control>
0x8B 0x008B # <control>
0x8C 0x008C # <control>
0x8D 0x008D # <control>
0x8E 0x008E # <control>
0x8F 0x008F # <control>
0x90 0x0090 # <control>
0x91 0x0091 # <control>
0x92 0x0092 # <control>
0x93 0x0093 # <control>
0x94 0x0094 # <control>
0x95 0x0095 # <control>
0x96 0x0096 # <control>
0x97 0x0097 # <control>
0x98 0x0098 # <control>
0x99 0x0099 # <control>
0x9A 0x009A # <control>
0x9B 0x009B # <control>
0x9C 0x009C # <control>
0x9D 0x009D # <control>
0x9E 0x009E # <control>
0x9F 0x009F # <control>
#0xA0 0x00A0 # NO-BREAK SPACE
0xA1 0x0E01 # THAI CHARACTER KO KAI
0xA2 0x0E02 # THAI CHARACTER KHO KHAI
0xA3 0x0E03 # THAI CHARACTER KHO KHUAT
0xA4 0x0E04 # THAI CHARACTER KHO KHWAI
0xA5 0x0E05 # THAI CHARACTER KHO KHON
0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG
0xA7 0x0E07 # THAI CHARACTER NGO NGU
0xA8 0x0E08 # THAI CHARACTER CHO CHAN
0xA9 0x0E09 # THAI CHARACTER CHO CHING
0xAA 0x0E0A # THAI CHARACTER CHO CHANG
0xAB 0x0E0B # THAI CHARACTER SO SO
0xAC 0x0E0C # THAI CHARACTER CHO CHOE
0xAD 0x0E0D # THAI CHARACTER YO YING
0xAE 0x0E0E # THAI CHARACTER DO CHADA
0xAF 0x0E0F # THAI CHARACTER TO PATAK
0xB0 0x0E10 # THAI CHARACTER THO THAN
0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO
0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO
0xB3 0x0E13 # THAI CHARACTER NO NEN
0xB4 0x0E14 # THAI CHARACTER DO DEK
0xB5 0x0E15 # THAI CHARACTER TO TAO
0xB6 0x0E16 # THAI CHARACTER THO THUNG
0xB7 0x0E17 # THAI CHARACTER THO THAHAN
0xB8 0x0E18 # THAI CHARACTER THO THONG
0xB9 0x0E19 # THAI CHARACTER NO NU
0xBA 0x0E1A # THAI CHARACTER BO BAIMAI
0xBB 0x0E1B # THAI CHARACTER PO PLA
0xBC 0x0E1C # THAI CHARACTER PHO PHUNG
0xBD 0x0E1D # THAI CHARACTER FO FA
0xBE 0x0E1E # THAI CHARACTER PHO PHAN
0xBF 0x0E1F # THAI CHARACTER FO FAN
0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO
0xC1 0x0E21 # THAI CHARACTER MO MA
0xC2 0x0E22 # THAI CHARACTER YO YAK
0xC3 0x0E23 # THAI CHARACTER RO RUA
0xC4 0x0E24 # THAI CHARACTER RU
0xC5 0x0E25 # THAI CHARACTER LO LING
0xC6 0x0E26 # THAI CHARACTER LU
0xC7 0x0E27 # THAI CHARACTER WO WAEN
0xC8 0x0E28 # THAI CHARACTER SO SALA
0xC9 0x0E29 # THAI CHARACTER SO RUSI
0xCA 0x0E2A # THAI CHARACTER SO SUA
0xCB 0x0E2B # THAI CHARACTER HO HIP
0xCC 0x0E2C # THAI CHARACTER LO CHULA
0xCD 0x0E2D # THAI CHARACTER O ANG
0xCE 0x0E2E # THAI CHARACTER HO NOKHUK
0xCF 0x0E2F # THAI CHARACTER PAIYANNOI
0xD0 0x0E30 # THAI CHARACTER SARA A
0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT
0xD2 0x0E32 # THAI CHARACTER SARA AA
0xD3 0x0E33 # THAI CHARACTER SARA AM
0xD4 0x0E34 # THAI CHARACTER SARA I
0xD5 0x0E35 # THAI CHARACTER SARA II
0xD6 0x0E36 # THAI CHARACTER SARA UE
0xD7 0x0E37 # THAI CHARACTER SARA UEE
0xD8 0x0E38 # THAI CHARACTER SARA U
0xD9 0x0E39 # THAI CHARACTER SARA UU
0xDA 0x0E3A # THAI CHARACTER PHINTHU
0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT
0xE0 0x0E40 # THAI CHARACTER SARA E
0xE1 0x0E41 # THAI CHARACTER SARA AE
0xE2 0x0E42 # THAI CHARACTER SARA O
0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN
0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI
0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO
0xE6 0x0E46 # THAI CHARACTER MAIYAMOK
0xE7 0x0E47 # THAI CHARACTER MAITAIKHU
0xE8 0x0E48 # THAI CHARACTER MAI EK
0xE9 0x0E49 # THAI CHARACTER MAI THO
0xEA 0x0E4A # THAI CHARACTER MAI TRI
0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA
0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT
0xED 0x0E4D # THAI CHARACTER NIKHAHIT
0xEE 0x0E4E # THAI CHARACTER YAMAKKAN
0xEF 0x0E4F # THAI CHARACTER FONGMAN
0xF0 0x0E50 # THAI DIGIT ZERO
0xF1 0x0E51 # THAI DIGIT ONE
0xF2 0x0E52 # THAI DIGIT TWO
0xF3 0x0E53 # THAI DIGIT THREE
0xF4 0x0E54 # THAI DIGIT FOUR
0xF5 0x0E55 # THAI DIGIT FIVE
0xF6 0x0E56 # THAI DIGIT SIX
0xF7 0x0E57 # THAI DIGIT SEVEN
0xF8 0x0E58 # THAI DIGIT EIGHT
0xF9 0x0E59 # THAI DIGIT NINE
0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU
0xFB 0x0E5B # THAI CHARACTER KHOMUT