Import data for arabic joining computation
This commit is contained in:
parent
e83200d891
commit
03e4ef0293
4 changed files with 445 additions and 9 deletions
|
@ -13,7 +13,7 @@ echo timestamp > stamp-h.in
|
||||||
|
|
||||||
python util/import_gcry.py lib/libgcrypt/ .
|
python util/import_gcry.py lib/libgcrypt/ .
|
||||||
|
|
||||||
python util/import_unicode.py unicode/UnicodeData.txt unicode/BidiMirroring.txt unidata.c
|
python util/import_unicode.py unicode/UnicodeData.txt unicode/BidiMirroring.txt unicode/ArabicShaping.txt unidata.c
|
||||||
|
|
||||||
for rmk in conf/*.rmk ${GRUB_CONTRIB}/*/conf/*.rmk; do
|
for rmk in conf/*.rmk ${GRUB_CONTRIB}/*/conf/*.rmk; do
|
||||||
if test -e $rmk ; then
|
if test -e $rmk ; then
|
||||||
|
|
|
@ -36,6 +36,7 @@ struct grub_unicode_compact_range
|
||||||
grub_uint8_t bidi_type:5;
|
grub_uint8_t bidi_type:5;
|
||||||
grub_uint8_t comb_type;
|
grub_uint8_t comb_type;
|
||||||
grub_uint8_t bidi_mirror:1;
|
grub_uint8_t bidi_mirror:1;
|
||||||
|
grub_uint8_t join_type:3;
|
||||||
} __attribute__ ((packed));
|
} __attribute__ ((packed));
|
||||||
|
|
||||||
enum grub_bidi_type
|
enum grub_bidi_type
|
||||||
|
@ -61,6 +62,16 @@ enum grub_bidi_type
|
||||||
GRUB_BIDI_TYPE_ON
|
GRUB_BIDI_TYPE_ON
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum grub_join_type
|
||||||
|
{
|
||||||
|
GRUB_JOIN_TYPE_NONJOINING = 0,
|
||||||
|
GRUB_JOIN_TYPE_LEFT = 1,
|
||||||
|
GRUB_JOIN_TYPE_RIGHT = 2,
|
||||||
|
GRUB_JOIN_TYPE_DUAL = 3,
|
||||||
|
GRUB_JOIN_TYPE_CAUSING = 4,
|
||||||
|
GRUB_JOIN_TYPE_TRANSPARENT = 5
|
||||||
|
};
|
||||||
|
|
||||||
enum grub_comb_type
|
enum grub_comb_type
|
||||||
{
|
{
|
||||||
GRUB_UNICODE_COMB_NONE = 0,
|
GRUB_UNICODE_COMB_NONE = 0,
|
||||||
|
@ -103,7 +114,7 @@ struct grub_unicode_glyph
|
||||||
{
|
{
|
||||||
grub_uint32_t base;
|
grub_uint32_t base;
|
||||||
grub_uint16_t variant:9;
|
grub_uint16_t variant:9;
|
||||||
grub_uint8_t attributes:1;
|
grub_uint8_t attributes:5;
|
||||||
grub_size_t ncomb;
|
grub_size_t ncomb;
|
||||||
struct grub_unicode_combining {
|
struct grub_unicode_combining {
|
||||||
grub_uint32_t code;
|
grub_uint32_t code;
|
||||||
|
@ -115,12 +126,30 @@ struct grub_unicode_glyph
|
||||||
};
|
};
|
||||||
|
|
||||||
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_MIRROR 0x1
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_MIRROR 0x1
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT 1
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED 0x2
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
|
||||||
|
(GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
|
||||||
|
<< GRUB_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
|
||||||
|
/* Set iff the corresponding joining flags come from ZWJ or ZWNJ. */
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT 0x8
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT \
|
||||||
|
(GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
|
||||||
|
<< GRUB_UNICODE_GLYPH_ATTRIBUTES_JOIN_LEFT_TO_RIGHT_SHIFT)
|
||||||
|
#define GRUB_UNICODE_GLYPH_ATTRIBUTES_JOIN \
|
||||||
|
(GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED \
|
||||||
|
| GRUB_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED \
|
||||||
|
| GRUB_UNICODE_GLYPH_ATTRIBUTE_LEFT_JOINED_EXPLICIT \
|
||||||
|
| GRUB_UNICODE_GLYPH_ATTRIBUTE_RIGHT_JOINED_EXPLICIT)
|
||||||
|
|
||||||
#define GRUB_UNICODE_COMBINING_GRAPHEME_JOINER 0x34f
|
#define GRUB_UNICODE_COMBINING_GRAPHEME_JOINER 0x34f
|
||||||
#define GRUB_UNICODE_VARIATION_SELECTOR_1 0xfe00
|
#define GRUB_UNICODE_VARIATION_SELECTOR_1 0xfe00
|
||||||
#define GRUB_UNICODE_VARIATION_SELECTOR_16 0xfe0f
|
#define GRUB_UNICODE_VARIATION_SELECTOR_16 0xfe0f
|
||||||
#define GRUB_UNICODE_VARIATION_SELECTOR_17 0xe0100
|
#define GRUB_UNICODE_VARIATION_SELECTOR_17 0xe0100
|
||||||
#define GRUB_UNICODE_VARIATION_SELECTOR_256 0xe01ef
|
#define GRUB_UNICODE_VARIATION_SELECTOR_256 0xe01ef
|
||||||
#define GRUB_UNICODE_HEBREW_WAW 0x05d5
|
#define GRUB_UNICODE_HEBREW_WAW 0x05d5
|
||||||
|
#define GRUB_UNICODE_ZWNJ 0x200c
|
||||||
|
#define GRUB_UNICODE_ZWJ 0x200d
|
||||||
|
|
||||||
extern struct grub_unicode_compact_range grub_unicode_compact[];
|
extern struct grub_unicode_compact_range grub_unicode_compact[];
|
||||||
extern struct grub_unicode_bidi_pair grub_unicode_bidi_pairs[];
|
extern struct grub_unicode_bidi_pair grub_unicode_bidi_pairs[];
|
||||||
|
|
373
unicode/ArabicShaping.txt
Normal file
373
unicode/ArabicShaping.txt
Normal file
|
@ -0,0 +1,373 @@
|
||||||
|
# ArabicShaping-5.2.0.txt
|
||||||
|
# Date: 2009-08-17, 11:11:00 PDT [KW]
|
||||||
|
#
|
||||||
|
# This file is a normative contributory data file in the
|
||||||
|
# Unicode Character Database.
|
||||||
|
#
|
||||||
|
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||||
|
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||||
|
#
|
||||||
|
# This file defines the shaping classes for Arabic, Syriac, and N'Ko
|
||||||
|
# positional shaping, repeating in machine readable form the
|
||||||
|
# information exemplified in Tables 8-3, 8-7, 8-8, 8-11, 8-12,
|
||||||
|
# 8-13, and 13-5 of The Unicode Standard, Version 5.2.
|
||||||
|
#
|
||||||
|
# See sections 8.2, 8.3, and 13.5 of The Unicode Standard, Version 5.2
|
||||||
|
# for more information.
|
||||||
|
#
|
||||||
|
# Each line contains four fields, separated by a semicolon.
|
||||||
|
#
|
||||||
|
# Field 0: the code point, in 4-digit hexadecimal
|
||||||
|
# form, of an Arabic, Syriac, or N'Ko character.
|
||||||
|
#
|
||||||
|
# Field 1: gives a short schematic name for that character,
|
||||||
|
# abbreviated from the normative Unicode character name.
|
||||||
|
#
|
||||||
|
# Field 2: defines the joining type (property name: Joining_Type)
|
||||||
|
# R Right_Joining
|
||||||
|
# L Left_Joining
|
||||||
|
# D Dual_Joining
|
||||||
|
# C Join_Causing
|
||||||
|
# U Non_Joining
|
||||||
|
# T Transparent
|
||||||
|
# See Section 8.2, Arabic for more information on these types.
|
||||||
|
#
|
||||||
|
# Field 3: defines the joining group (property name: Joining_Group)
|
||||||
|
#
|
||||||
|
# The values of the joining group are based schematically on character
|
||||||
|
# names. Where a schematic character name consists of two or more parts separated
|
||||||
|
# by spaces, the formal Joining_Group property value, as specified in
|
||||||
|
# PropertyValueAliases.txt, consists of the same name parts joined by
|
||||||
|
# underscores. Hence, the entry:
|
||||||
|
#
|
||||||
|
# 0629; TEH MARBUTA; R; TEH MARBUTA
|
||||||
|
#
|
||||||
|
# corresponds to [Joining_Group = Teh_Marbuta].
|
||||||
|
#
|
||||||
|
# Note: For historical reasons, the property value [Joining_Group = Hamza_On_Heh_Goal]
|
||||||
|
# is anachronistically named. It used to apply to both of the following characters
|
||||||
|
# in earlier versions of the standard:
|
||||||
|
#
|
||||||
|
# U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
|
||||||
|
# U+06C3 ARABIC LETTER TEH MARBUTA GOAL
|
||||||
|
#
|
||||||
|
# However, it currently applies only to U+06C3, and *not* to U+06C2.
|
||||||
|
# To avoid destabilizing existing Joining_Group property aliases, the
|
||||||
|
# value Hamza_On_Heh_Goal has not been changed, despite the fact that it
|
||||||
|
# no longer applies to Hamza On Heh Goal, but only to Teh Marbuta Goal.
|
||||||
|
#
|
||||||
|
# When other cursive scripts are added to the Unicode Standard in
|
||||||
|
# the future, the joining group value of all its letters will default
|
||||||
|
# to jg=No_Joining_Group in this data file. Other, more specific
|
||||||
|
# joining group values will be defined only if an explicit proposal
|
||||||
|
# to define those values exactly has been approved by the UTC. This
|
||||||
|
# is the convention exemplified by the N'Ko script. Only the Arabic
|
||||||
|
# and Syriac scripts currently have explicit joining group values defined.
|
||||||
|
#
|
||||||
|
# Note: Code points that are not explicitly listed in this file are
|
||||||
|
# either of joining type T or U:
|
||||||
|
#
|
||||||
|
# - Those that not explicitly listed that are of General Category Mn, Me, or Cf
|
||||||
|
# have joining type T.
|
||||||
|
# - All others not explicitly listed have joining type U.
|
||||||
|
#
|
||||||
|
# For an explicit listing of characters of joining type T, see
|
||||||
|
# the derived property file DerivedJoiningType.txt.
|
||||||
|
#
|
||||||
|
# There are currently no characters of joining type L defined in Unicode.
|
||||||
|
#
|
||||||
|
# #############################################################
|
||||||
|
|
||||||
|
# Unicode; Schematic Name; Joining Type; Joining Group
|
||||||
|
|
||||||
|
# Arabic characters
|
||||||
|
|
||||||
|
0600; ARABIC NUMBER SIGN; U; No_Joining_Group
|
||||||
|
0601; ARABIC SIGN SANAH; U; No_Joining_Group
|
||||||
|
0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group
|
||||||
|
0603; ARABIC SIGN SAFHA; U; No_Joining_Group
|
||||||
|
0608; ARABIC RAY; U; No_Joining_Group
|
||||||
|
060B; AFGHANI SIGN; U; No_Joining_Group
|
||||||
|
0621; HAMZA; U; No_Joining_Group
|
||||||
|
0622; MADDA ON ALEF; R; ALEF
|
||||||
|
0623; HAMZA ON ALEF; R; ALEF
|
||||||
|
0624; HAMZA ON WAW; R; WAW
|
||||||
|
0625; HAMZA UNDER ALEF; R; ALEF
|
||||||
|
0626; HAMZA ON YEH; D; YEH
|
||||||
|
0627; ALEF; R; ALEF
|
||||||
|
0628; BEH; D; BEH
|
||||||
|
0629; TEH MARBUTA; R; TEH MARBUTA
|
||||||
|
062A; TEH; D; BEH
|
||||||
|
062B; THEH; D; BEH
|
||||||
|
062C; JEEM; D; HAH
|
||||||
|
062D; HAH; D; HAH
|
||||||
|
062E; KHAH; D; HAH
|
||||||
|
062F; DAL; R; DAL
|
||||||
|
0630; THAL; R; DAL
|
||||||
|
0631; REH; R; REH
|
||||||
|
0632; ZAIN; R; REH
|
||||||
|
0633; SEEN; D; SEEN
|
||||||
|
0634; SHEEN; D; SEEN
|
||||||
|
0635; SAD; D; SAD
|
||||||
|
0636; DAD; D; SAD
|
||||||
|
0637; TAH; D; TAH
|
||||||
|
0638; ZAH; D; TAH
|
||||||
|
0639; AIN; D; AIN
|
||||||
|
063A; GHAIN; D; AIN
|
||||||
|
063B; KEHEH WITH 2 DOTS ABOVE; D; GAF
|
||||||
|
063C; KEHEH WITH 3 DOTS BELOW; D; GAF
|
||||||
|
063D; FARSI YEH WITH INVERTED V; D; FARSI YEH
|
||||||
|
063E; FARSI YEH WITH 2 DOTS ABOVE; D; FARSI YEH
|
||||||
|
063F; FARSI YEH WITH 3 DOTS ABOVE; D; FARSI YEH
|
||||||
|
0640; TATWEEL; C; No_Joining_Group
|
||||||
|
0641; FEH; D; FEH
|
||||||
|
0642; QAF; D; QAF
|
||||||
|
0643; KAF; D; KAF
|
||||||
|
0644; LAM; D; LAM
|
||||||
|
0645; MEEM; D; MEEM
|
||||||
|
0646; NOON; D; NOON
|
||||||
|
0647; HEH; D; HEH
|
||||||
|
0648; WAW; R; WAW
|
||||||
|
0649; ALEF MAKSURA; D; YEH
|
||||||
|
064A; YEH; D; YEH
|
||||||
|
066E; DOTLESS BEH; D; BEH
|
||||||
|
066F; DOTLESS QAF; D; QAF
|
||||||
|
0671; HAMZAT WASL ON ALEF; R; ALEF
|
||||||
|
0672; WAVY HAMZA ON ALEF; R; ALEF
|
||||||
|
0673; WAVY HAMZA UNDER ALEF; R; ALEF
|
||||||
|
0674; HIGH HAMZA; U; No_Joining_Group
|
||||||
|
0675; HIGH HAMZA ALEF; R; ALEF
|
||||||
|
0676; HIGH HAMZA WAW; R; WAW
|
||||||
|
0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
|
||||||
|
0678; HIGH HAMZA YEH; D; YEH
|
||||||
|
0679; TEH WITH SMALL TAH; D; BEH
|
||||||
|
067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH
|
||||||
|
067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH
|
||||||
|
067C; TEH WITH RING; D; BEH
|
||||||
|
067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH
|
||||||
|
067E; TEH WITH 3 DOTS BELOW; D; BEH
|
||||||
|
067F; TEH WITH 4 DOTS ABOVE; D; BEH
|
||||||
|
0680; BEH WITH 4 DOTS BELOW; D; BEH
|
||||||
|
0681; HAMZA ON HAH; D; HAH
|
||||||
|
0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH
|
||||||
|
0683; HAH WITH MIDDLE 2 DOTS; D; HAH
|
||||||
|
0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH
|
||||||
|
0685; HAH WITH 3 DOTS ABOVE; D; HAH
|
||||||
|
0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH
|
||||||
|
0687; HAH WITH MIDDLE 4 DOTS; D; HAH
|
||||||
|
0688; DAL WITH SMALL TAH; R; DAL
|
||||||
|
0689; DAL WITH RING; R; DAL
|
||||||
|
068A; DAL WITH DOT BELOW; R; DAL
|
||||||
|
068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL
|
||||||
|
068C; DAL WITH 2 DOTS ABOVE; R; DAL
|
||||||
|
068D; DAL WITH 2 DOTS BELOW; R; DAL
|
||||||
|
068E; DAL WITH 3 DOTS ABOVE; R; DAL
|
||||||
|
068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL
|
||||||
|
0690; DAL WITH 4 DOTS ABOVE; R; DAL
|
||||||
|
0691; REH WITH SMALL TAH; R; REH
|
||||||
|
0692; REH WITH SMALL V; R; REH
|
||||||
|
0693; REH WITH RING; R; REH
|
||||||
|
0694; REH WITH DOT BELOW; R; REH
|
||||||
|
0695; REH WITH SMALL V BELOW; R; REH
|
||||||
|
0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH
|
||||||
|
0697; REH WITH 2 DOTS ABOVE; R; REH
|
||||||
|
0698; REH WITH 3 DOTS ABOVE; R; REH
|
||||||
|
0699; REH WITH 4 DOTS ABOVE; R; REH
|
||||||
|
069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN
|
||||||
|
069B; SEEN WITH 3 DOTS BELOW; D; SEEN
|
||||||
|
069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN
|
||||||
|
069D; SAD WITH 2 DOTS BELOW; D; SAD
|
||||||
|
069E; SAD WITH 3 DOTS ABOVE; D; SAD
|
||||||
|
069F; TAH WITH 3 DOTS ABOVE; D; TAH
|
||||||
|
06A0; AIN WITH 3 DOTS ABOVE; D; AIN
|
||||||
|
06A1; DOTLESS FEH; D; FEH
|
||||||
|
06A2; FEH WITH DOT MOVED BELOW; D; FEH
|
||||||
|
06A3; FEH WITH DOT BELOW; D; FEH
|
||||||
|
06A4; FEH WITH 3 DOTS ABOVE; D; FEH
|
||||||
|
06A5; FEH WITH 3 DOTS BELOW; D; FEH
|
||||||
|
06A6; FEH WITH 4 DOTS ABOVE; D; FEH
|
||||||
|
06A7; QAF WITH DOT ABOVE; D; QAF
|
||||||
|
06A8; QAF WITH 3 DOTS ABOVE; D; QAF
|
||||||
|
06A9; KEHEH; D; GAF
|
||||||
|
06AA; SWASH KAF; D; SWASH KAF
|
||||||
|
06AB; KAF WITH RING; D; GAF
|
||||||
|
06AC; KAF WITH DOT ABOVE; D; KAF
|
||||||
|
06AD; KAF WITH 3 DOTS ABOVE; D; KAF
|
||||||
|
06AE; KAF WITH 3 DOTS BELOW; D; KAF
|
||||||
|
06AF; GAF; D; GAF
|
||||||
|
06B0; GAF WITH RING; D; GAF
|
||||||
|
06B1; GAF WITH 2 DOTS ABOVE; D; GAF
|
||||||
|
06B2; GAF WITH 2 DOTS BELOW; D; GAF
|
||||||
|
06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF
|
||||||
|
06B4; GAF WITH 3 DOTS ABOVE; D; GAF
|
||||||
|
06B5; LAM WITH SMALL V; D; LAM
|
||||||
|
06B6; LAM WITH DOT ABOVE; D; LAM
|
||||||
|
06B7; LAM WITH 3 DOTS ABOVE; D; LAM
|
||||||
|
06B8; LAM WITH 3 DOTS BELOW; D; LAM
|
||||||
|
06B9; NOON WITH DOT BELOW; D; NOON
|
||||||
|
06BA; DOTLESS NOON; D; NOON
|
||||||
|
06BB; DOTLESS NOON WITH SMALL TAH; D; NOON
|
||||||
|
06BC; NOON WITH RING; D; NOON
|
||||||
|
06BD; NYA; D; NYA
|
||||||
|
06BE; KNOTTED HEH; D; KNOTTED HEH
|
||||||
|
06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
|
||||||
|
06C0; HAMZA ON HEH; R; TEH MARBUTA
|
||||||
|
06C1; HEH GOAL; D; HEH GOAL
|
||||||
|
06C2; HAMZA ON HEH GOAL; D; HEH GOAL
|
||||||
|
06C3; TEH MARBUTA GOAL; R; HAMZA ON HEH GOAL
|
||||||
|
06C4; WAW WITH RING; R; WAW
|
||||||
|
06C5; WAW WITH BAR; R; WAW
|
||||||
|
06C6; WAW WITH SMALL V; R; WAW
|
||||||
|
06C7; WAW WITH DAMMA; R; WAW
|
||||||
|
06C8; WAW WITH ALEF ABOVE; R; WAW
|
||||||
|
06C9; WAW WITH INVERTED SMALL V; R; WAW
|
||||||
|
06CA; WAW WITH 2 DOTS ABOVE; R; WAW
|
||||||
|
06CB; WAW WITH 3 DOTS ABOVE; R; WAW
|
||||||
|
06CC; FARSI YEH; D; FARSI YEH
|
||||||
|
06CD; YEH WITH TAIL; R; YEH WITH TAIL
|
||||||
|
06CE; FARSI YEH WITH SMALL V; D; FARSI YEH
|
||||||
|
06CF; WAW WITH DOT ABOVE; R; WAW
|
||||||
|
06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH
|
||||||
|
06D1; YEH WITH 3 DOTS BELOW; D; YEH
|
||||||
|
06D2; YEH BARREE; R; YEH BARREE
|
||||||
|
06D3; HAMZA ON YEH BARREE; R; YEH BARREE
|
||||||
|
06D5; AE; R; TEH MARBUTA
|
||||||
|
06DD; ARABIC END OF AYAH; U; No_Joining_Group
|
||||||
|
06EE; DAL WITH INVERTED V; R; DAL
|
||||||
|
06EF; REH WITH INVERTED V; R; REH
|
||||||
|
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
|
||||||
|
06FB; DAD WITH DOT BELOW; D; SAD
|
||||||
|
06FC; GHAIN WITH DOT BELOW; D; AIN
|
||||||
|
06FF; HEH WITH INVERTED V; D; KNOTTED HEH
|
||||||
|
|
||||||
|
# Syriac characters
|
||||||
|
|
||||||
|
0710; ALAPH; R; ALAPH
|
||||||
|
0712; BETH; D; BETH
|
||||||
|
0713; GAMAL; D; GAMAL
|
||||||
|
0714; GAMAL GARSHUNI; D; GAMAL
|
||||||
|
0715; DALATH; R; DALATH RISH
|
||||||
|
0716; DOTLESS DALATH RISH; R; DALATH RISH
|
||||||
|
0717; HE; R; HE
|
||||||
|
0718; WAW; R; SYRIAC WAW
|
||||||
|
0719; ZAIN; R; ZAIN
|
||||||
|
071A; HETH; D; HETH
|
||||||
|
071B; TETH; D; TETH
|
||||||
|
071C; TETH GARSHUNI; D; TETH
|
||||||
|
071D; YUDH; D; YUDH
|
||||||
|
071E; YUDH HE; R; YUDH HE
|
||||||
|
071F; KAPH; D; KAPH
|
||||||
|
0720; LAMADH; D; LAMADH
|
||||||
|
0721; MIM; D; MIM
|
||||||
|
0722; NUN; D; NUN
|
||||||
|
0723; SEMKATH; D; SEMKATH
|
||||||
|
0724; FINAL SEMKATH; D; FINAL SEMKATH
|
||||||
|
0725; E; D; E
|
||||||
|
0726; PE; D; PE
|
||||||
|
0727; REVERSED PE; D; REVERSED PE
|
||||||
|
0728; SADHE; R; SADHE
|
||||||
|
0729; QAPH; D; QAPH
|
||||||
|
072A; RISH; R; DALATH RISH
|
||||||
|
072B; SHIN; D; SHIN
|
||||||
|
072C; TAW; R; TAW
|
||||||
|
072D; PERSIAN BHETH; D; BETH
|
||||||
|
072E; PERSIAN GHAMAL; D; GAMAL
|
||||||
|
072F; PERSIAN DHALATH; R; DALATH RISH
|
||||||
|
074D; SOGDIAN ZHAIN; R; ZHAIN
|
||||||
|
074E; SOGDIAN KHAPH; D; KHAPH
|
||||||
|
074F; SOGDIAN FE; D; FE
|
||||||
|
|
||||||
|
# Arabic supplement characters
|
||||||
|
|
||||||
|
0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH
|
||||||
|
0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH
|
||||||
|
0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH
|
||||||
|
0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH
|
||||||
|
0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH
|
||||||
|
0755; BEH WITH INVERTED SMALL V BELOW; D; BEH
|
||||||
|
0756; BEH WITH SMALL V; D; BEH
|
||||||
|
0757; HAH WITH 2 DOTS ABOVE; D; HAH
|
||||||
|
0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH
|
||||||
|
0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL
|
||||||
|
075A; DAL WITH INVERTED SMALL V BELOW; R; DAL
|
||||||
|
075B; REH WITH STROKE; R; REH
|
||||||
|
075C; SEEN WITH 4 DOTS ABOVE; D; SEEN
|
||||||
|
075D; AIN WITH 2 DOTS ABOVE; D; AIN
|
||||||
|
075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN
|
||||||
|
075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN
|
||||||
|
0760; FEH WITH 2 DOTS BELOW; D; FEH
|
||||||
|
0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH
|
||||||
|
0762; KEHEH WITH DOT ABOVE; D; GAF
|
||||||
|
0763; KEHEH WITH 3 DOTS ABOVE; D; GAF
|
||||||
|
0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF
|
||||||
|
0765; MEEM WITH DOT ABOVE; D; MEEM
|
||||||
|
0766; MEEM WITH DOT BELOW; D; MEEM
|
||||||
|
0767; NOON WITH 2 DOTS BELOW; D; NOON
|
||||||
|
0768; NOON WITH SMALL TAH; D; NOON
|
||||||
|
0769; NOON WITH SMALL V; D; NOON
|
||||||
|
076A; LAM WITH BAR; D; LAM
|
||||||
|
076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH
|
||||||
|
076C; REH WITH HAMZA ABOVE; R; REH
|
||||||
|
076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN
|
||||||
|
076E; HAH WITH SMALL TAH BELOW; D; HAH
|
||||||
|
076F; HAH WITH SMALL TAH AND 2 DOTS; D; HAH
|
||||||
|
0770; SEEN WITH SMALL TAH AND 2 DOTS; D; SEEN
|
||||||
|
0771; REH WITH SMALL TAH AND 2 DOTS; R; REH
|
||||||
|
0772; HAH WITH SMALL TAH ABOVE; D; HAH
|
||||||
|
0773; ALEF WITH DIGIT TWO ABOVE; R; ALEF
|
||||||
|
0774; ALEF WITH DIGIT THREE ABOVE; R; ALEF
|
||||||
|
0775; FARSI YEH WITH DIGIT TWO ABOVE; D; FARSI YEH
|
||||||
|
0776; FARSI YEH WITH DIGIT THREE ABOVE; D; FARSI YEH
|
||||||
|
0777; YEH WITH DIGIT FOUR BELOW; D; YEH
|
||||||
|
0778; WAW WITH DIGIT TWO ABOVE; R; WAW
|
||||||
|
0779; WAW WITH DIGIT THREE ABOVE; R; WAW
|
||||||
|
077A; YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE
|
||||||
|
077B; YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE
|
||||||
|
077C; HAH WITH DIGIT FOUR BELOW; D; HAH
|
||||||
|
077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN
|
||||||
|
077E; SEEN WITH INVERTED V; D; SEEN
|
||||||
|
077F; KAF WITH 2 DOTS ABOVE; D; KAF
|
||||||
|
|
||||||
|
# N'Ko Characters
|
||||||
|
|
||||||
|
07CA; NKO A; D; No_Joining_Group
|
||||||
|
07CB; NKO EE; D; No_Joining_Group
|
||||||
|
07CC; NKO I; D; No_Joining_Group
|
||||||
|
07CD; NKO E; D; No_Joining_Group
|
||||||
|
07CE; NKO U; D; No_Joining_Group
|
||||||
|
07CF; NKO OO; D; No_Joining_Group
|
||||||
|
07D0; NKO O; D; No_Joining_Group
|
||||||
|
07D1; NKO DAGBASINNA; D; No_Joining_Group
|
||||||
|
07D2; NKO N; D; No_Joining_Group
|
||||||
|
07D3; NKO BA; D; No_Joining_Group
|
||||||
|
07D4; NKO PA; D; No_Joining_Group
|
||||||
|
07D5; NKO TA; D; No_Joining_Group
|
||||||
|
07D6; NKO JA; D; No_Joining_Group
|
||||||
|
07D7; NKO CHA; D; No_Joining_Group
|
||||||
|
07D8; NKO DA; D; No_Joining_Group
|
||||||
|
07D9; NKO RA; D; No_Joining_Group
|
||||||
|
07DA; NKO RRA; D; No_Joining_Group
|
||||||
|
07DB; NKO SA; D; No_Joining_Group
|
||||||
|
07DC; NKO GBA; D; No_Joining_Group
|
||||||
|
07DD; NKO FA; D; No_Joining_Group
|
||||||
|
07DE; NKO KA; D; No_Joining_Group
|
||||||
|
07DF; NKO LA; D; No_Joining_Group
|
||||||
|
07E0; NKO NA WOLOSO; D; No_Joining_Group
|
||||||
|
07E1; NKO MA; D; No_Joining_Group
|
||||||
|
07E2; NKO NYA; D; No_Joining_Group
|
||||||
|
07E3; NKO NA; D; No_Joining_Group
|
||||||
|
07E4; NKO HA; D; No_Joining_Group
|
||||||
|
07E5; NKO WA; D; No_Joining_Group
|
||||||
|
07E6; NKO YA; D; No_Joining_Group
|
||||||
|
07E7; NKO NYA WOLOSO; D; No_Joining_Group
|
||||||
|
07E8; NKO JONA JA; D; No_Joining_Group
|
||||||
|
07E9; NKO JONA CHA; D; No_Joining_Group
|
||||||
|
07EA; NKO JONA RA; D; No_Joining_Group
|
||||||
|
07FA; NKO LAJANYALAN; C; No_Joining_Group
|
||||||
|
|
||||||
|
# Other
|
||||||
|
|
||||||
|
200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
|
||||||
|
200D; ZERO WIDTH JOINER; C; No_Joining_Group
|
||||||
|
|
||||||
|
# EOF
|
|
@ -24,8 +24,33 @@ import datetime
|
||||||
if len (sys.argv) < 3:
|
if len (sys.argv) < 3:
|
||||||
print ("Usage: %s SOURCE DESTINATION" % sys.argv[0])
|
print ("Usage: %s SOURCE DESTINATION" % sys.argv[0])
|
||||||
exit (0)
|
exit (0)
|
||||||
|
infile = open (sys.argv[3], "r")
|
||||||
|
joining = {}
|
||||||
|
for line in infile:
|
||||||
|
line = re.sub ("#.*$", "", line)
|
||||||
|
line = line.replace ("\n", "")
|
||||||
|
line = line.replace (" ", "")
|
||||||
|
if len (line) == 0 or line[0] == '\n':
|
||||||
|
continue
|
||||||
|
sp = line.split (";")
|
||||||
|
curcode = int (sp[0], 16)
|
||||||
|
if sp[2] == "U":
|
||||||
|
joining[curcode] = "GRUB_JOIN_TYPE_NONJOINING"
|
||||||
|
elif sp[2] == "L":
|
||||||
|
joining[curcode] = "GRUB_JOIN_TYPE_LEFT"
|
||||||
|
elif sp[2] == "R":
|
||||||
|
joining[curcode] = "GRUB_JOIN_TYPE_RIGHT"
|
||||||
|
elif sp[2] == "D":
|
||||||
|
joining[curcode] = "GRUB_JOIN_TYPE_DUAL"
|
||||||
|
elif sp[2] == "C":
|
||||||
|
joining[curcode] = "GRUB_JOIN_TYPE_CAUSING"
|
||||||
|
else:
|
||||||
|
print ("Unknown joining type '%s'" % sp[2])
|
||||||
|
exit (1)
|
||||||
|
infile.close ()
|
||||||
|
|
||||||
infile = open (sys.argv[1], "r")
|
infile = open (sys.argv[1], "r")
|
||||||
outfile = open (sys.argv[3], "w")
|
outfile = open (sys.argv[4], "w")
|
||||||
outfile.write ("#include <grub/unicode.h>\n")
|
outfile.write ("#include <grub/unicode.h>\n")
|
||||||
outfile.write ("\n")
|
outfile.write ("\n")
|
||||||
outfile.write ("struct grub_unicode_compact_range grub_unicode_compact[] = {\n")
|
outfile.write ("struct grub_unicode_compact_range grub_unicode_compact[] = {\n")
|
||||||
|
@ -74,23 +99,32 @@ for line in infile:
|
||||||
curcombtype != 240 and curcombtype != 253 and \
|
curcombtype != 240 and curcombtype != 253 and \
|
||||||
curcombtype != 254 and curcombtype != 255):
|
curcombtype != 254 and curcombtype != 255):
|
||||||
print ("WARNING: Unknown combining type %d" % curcombtype)
|
print ("WARNING: Unknown combining type %d" % curcombtype)
|
||||||
|
if curcode in joining:
|
||||||
|
curjoin = joining[curcode]
|
||||||
|
elif sp[2] == "Me" or sp[2] == "Mn" or sp[2] == "Cf":
|
||||||
|
curjoin = "GRUB_JOIN_TYPE_TRANSPARENT"
|
||||||
|
else:
|
||||||
|
curjoin = "GRUB_JOIN_TYPE_NONJOINING"
|
||||||
if lastcode + 1 != curcode or curbiditype != lastbiditype \
|
if lastcode + 1 != curcode or curbiditype != lastbiditype \
|
||||||
or curcombtype != lastcombtype or curmirrortype != lastmirrortype:
|
or curcombtype != lastcombtype or curmirrortype != lastmirrortype \
|
||||||
|
or curjoin != lastjoin:
|
||||||
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \
|
if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \
|
||||||
lastmirrortype):
|
lastmirrortype):
|
||||||
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d},\n" \
|
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, %s},\n" \
|
||||||
% (begincode, lastcode, lastbiditype, \
|
% (begincode, lastcode, lastbiditype, \
|
||||||
lastcombtype, lastmirrortype)))
|
lastcombtype, lastmirrortype, \
|
||||||
|
lastjoin)))
|
||||||
begincode = curcode
|
begincode = curcode
|
||||||
lastcode = curcode
|
lastcode = curcode
|
||||||
|
lastjoin = curjoin
|
||||||
lastbiditype = curbiditype
|
lastbiditype = curbiditype
|
||||||
lastcombtype = curcombtype
|
lastcombtype = curcombtype
|
||||||
lastmirrortype = curmirrortype
|
lastmirrortype = curmirrortype
|
||||||
if lastbiditype != "L" or lastcombtype != 0 or lastmirrortype:
|
if lastbiditype != "L" or lastcombtype != 0 or lastmirrortype:
|
||||||
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d},\n" \
|
outfile.write (("{0x%x, 0x%x, GRUB_BIDI_TYPE_%s, %d, %d, %s},\n" \
|
||||||
% (begincode, lastcode, lastbiditype, lastcombtype, \
|
% (begincode, lastcode, lastbiditype, lastcombtype, \
|
||||||
lastmirrortype)))
|
lastmirrortype, lastjoin)))
|
||||||
outfile.write ("{0, 0, 0, 0, 0},\n")
|
outfile.write ("{0, 0, 0, 0, 0, 0},\n")
|
||||||
|
|
||||||
outfile.write ("};\n")
|
outfile.write ("};\n")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue