Add character properties checking and a comment about problematic characters
This commit is contained in:
parent
832d13705c
commit
521bf3018f
2 changed files with 42 additions and 0 deletions
|
@ -16,6 +16,25 @@
|
|||
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
Current problems with Unicode rendering:
|
||||
- B and BN bidi type characters (ignored)
|
||||
- Mc type characters with combining class 0 (treated as non-combining)
|
||||
- Mn type characters with combining class 0 (treated as non-combining)
|
||||
- Me type characters with combining class 0 (treated as non-combining)
|
||||
- Cf type characters (ignored)
|
||||
- Cc type characters (ignored)
|
||||
- Line-breaking rules (e.g. Zs type characters)
|
||||
- Indic languages
|
||||
- Arabic shaping
|
||||
- Zl and Zp characters
|
||||
- Combining characters of types 7, 8, 9, 21, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 84, 91, 103, 107, 118, 122, 129, 130, 132,
|
||||
218, 224, 226, 233, 234
|
||||
- Private use surrogates
|
||||
- Private use characters (not really a problem)
|
||||
*/
|
||||
|
||||
/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
|
||||
bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
|
||||
Return the number of characters converted. DEST must be able to hold
|
||||
|
|
|
@ -45,12 +45,35 @@ for line in infile:
|
|||
print ("UnicodeData.txt uses combination type %d. Conflict." \
|
||||
% curcombtype)
|
||||
raise
|
||||
if sp[2] != "Lu" and sp[2] != "Ll" and sp[2] != "Lt" and sp[2] != "Lm" \
|
||||
and sp[2] != "Lo"\
|
||||
and sp[2] != "Me" and sp[2] != "Mc" and sp[2] != "Mn" \
|
||||
and sp[2] != "Nd" and sp[2] != "Nl" and sp[2] != "No" \
|
||||
and sp[2] != "Pc" and sp[2] != "Pd" and sp[2] != "Ps" \
|
||||
and sp[2] != "Pe" and sp[2] != "Pi" and sp[2] != "Pf" \
|
||||
and sp[2] != "Po" \
|
||||
and sp[2] != "Sm" and sp[2] != "Sc" and sp[2] != "Sk" \
|
||||
and sp[2] != "So"\
|
||||
and sp[2] != "Zs" and sp[2] != "Zl" and sp[2] != "Zp" \
|
||||
and sp[2] != "Cc" and sp[2] != "Cf" and sp[2] != "Cs" \
|
||||
and sp[2] != "Co":
|
||||
print ("WARNING: Unknown type %s" % sp[2])
|
||||
if curcombtype == 0 and sp[2] == "Me":
|
||||
curcombtype = 253
|
||||
if curcombtype == 0 and sp[2] == "Mc":
|
||||
curcombtype = 254
|
||||
if curcombtype == 0 and sp[2] == "Mn":
|
||||
curcombtype = 255
|
||||
if (curcombtype >= 2 and curcombtype <= 6) \
|
||||
or (curcombtype >= 37 and curcombtype != 84 and curcombtype != 91 and curcombtype != 103 and curcombtype != 107 and curcombtype != 118 and curcombtype != 122 and curcombtype != 129 and curcombtype != 130 and curcombtype != 132 and curcombtype != 202 and \
|
||||
curcombtype != 214 and curcombtype != 216 and \
|
||||
curcombtype != 218 and curcombtype != 220 and \
|
||||
curcombtype != 222 and curcombtype != 224 and curcombtype != 226 and curcombtype != 228 and \
|
||||
curcombtype != 230 and curcombtype != 232 and curcombtype != 233 and \
|
||||
curcombtype != 234 and \
|
||||
curcombtype != 240 and curcombtype != 253 and \
|
||||
curcombtype != 254 and curcombtype != 255):
|
||||
print ("WARNING: Unknown combining type %d" % curcombtype)
|
||||
if lastcode + 1 != curcode or curbiditype != lastbiditype \
|
||||
or curcombtype != lastcombtype or curmirrortype != lastmirrortype:
|
||||
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \
|
||||
|
|
Loading…
Reference in a new issue