Add character properties checking and a comment about problematic characters

This commit is contained in:
Vladimir 'phcoder' Serbinenko 2010-03-23 23:19:27 +01:00
parent 832d13705c
commit 521bf3018f
2 changed files with 42 additions and 0 deletions

View file

@ -16,6 +16,25 @@
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Current problems with Unicode rendering:
- B and BN bidi type characters (ignored)
- Mc type characters with combining class 0 (treated as non-combining)
- Mn type characters with combining class 0 (treated as non-combining)
- Me type characters with combining class 0 (treated as non-combining)
- Cf type characters (ignored)
- Cc type characters (ignored)
- Line-breaking rules (e.g. Zs type characters)
- Indic languages
- Arabic shaping
- Zl and Zp characters
- Combining characters of types 7, 8, 9, 21, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 84, 91, 103, 107, 118, 122, 129, 130, 132,
218, 224, 226, 233, 234
- Private use surrogates
- Private use characters (not really a problem)
*/
/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
Return the number of characters converted. DEST must be able to hold

View file

@ -45,12 +45,35 @@ for line in infile:
print ("UnicodeData.txt uses combination type %d. Conflict." \
% curcombtype)
raise
if sp[2] != "Lu" and sp[2] != "Ll" and sp[2] != "Lt" and sp[2] != "Lm" \
and sp[2] != "Lo"\
and sp[2] != "Me" and sp[2] != "Mc" and sp[2] != "Mn" \
and sp[2] != "Nd" and sp[2] != "Nl" and sp[2] != "No" \
and sp[2] != "Pc" and sp[2] != "Pd" and sp[2] != "Ps" \
and sp[2] != "Pe" and sp[2] != "Pi" and sp[2] != "Pf" \
and sp[2] != "Po" \
and sp[2] != "Sm" and sp[2] != "Sc" and sp[2] != "Sk" \
and sp[2] != "So"\
and sp[2] != "Zs" and sp[2] != "Zl" and sp[2] != "Zp" \
and sp[2] != "Cc" and sp[2] != "Cf" and sp[2] != "Cs" \
and sp[2] != "Co":
print ("WARNING: Unknown type %s" % sp[2])
if curcombtype == 0 and sp[2] == "Me":
curcombtype = 253
if curcombtype == 0 and sp[2] == "Mc":
curcombtype = 254
if curcombtype == 0 and sp[2] == "Mn":
curcombtype = 255
if (curcombtype >= 2 and curcombtype <= 6) \
or (curcombtype >= 37 and curcombtype != 84 and curcombtype != 91 and curcombtype != 103 and curcombtype != 107 and curcombtype != 118 and curcombtype != 122 and curcombtype != 129 and curcombtype != 130 and curcombtype != 132 and curcombtype != 202 and \
curcombtype != 214 and curcombtype != 216 and \
curcombtype != 218 and curcombtype != 220 and \
curcombtype != 222 and curcombtype != 224 and curcombtype != 226 and curcombtype != 228 and \
curcombtype != 230 and curcombtype != 232 and curcombtype != 233 and \
curcombtype != 234 and \
curcombtype != 240 and curcombtype != 253 and \
curcombtype != 254 and curcombtype != 255):
print ("WARNING: Unknown combining type %d" % curcombtype)
if lastcode + 1 != curcode or curbiditype != lastbiditype \
or curcombtype != lastcombtype or curmirrortype != lastmirrortype:
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \