Numerous EWTS->Unicode and especially EWTS->TMW improvements.
Fixed ordering of Unicode wowels. [ku+A] gives the correct Unicode now, e.g. EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A]. EWTS->TMW should now give errors any time the full input isn't used. Previously, wacky wowels like [kai+-i] would lead to some droppage. EWTS->TMW->Unicode testing is now in effect. This found a ton of EWTS->TMW bugs, most or all of which are fixed now. TMW->Unicode is improved/fixed for { \u5350,\u534D,\u0F88+k,\u0F88+kh,U }. (Why U? "\u0f75" is discouraged in favor of "\u0f71\u0f74".) NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly builds' sake, but I ran it in my sandbox and it passed.
This commit is contained in:
parent
36122778b4
commit
6d419fe641
19 changed files with 1014 additions and 547 deletions
|
@ -18,9 +18,80 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text;
|
||||
|
||||
/** This is where basic, static knowledge of THDL's Extended Wylie is housed.
|
||||
/** This is where basic, static knowledge of THDL's Extended Wylie is
|
||||
* housed. <p>TODO(dchandler): tibwn.ini has all this, yes? So
|
||||
* extend TibetanMachineWeb if necessary and use a bunch of HashMaps
|
||||
* there! This is needless duplication.
|
||||
* @see TibetanMachineWeb */
|
||||
public interface THDLWylieConstants {
|
||||
// TODO(DLC)[EWTS->Tibetan]: what about U+2638, mentioned in Section
|
||||
// 9.11 "Tibetan" of the Unicode 4.0.1 standard? Why doesn't EWTS
|
||||
// mention it? (Because TMW has no glyph for it, I bet.) Do we
|
||||
// handle it well?
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
* http://www.symbols.com/encyclopedia/15/155.html to learn about
|
||||
* its meaning as relates to Buddhism.
|
||||
*/
|
||||
public static final char SAUVASTIKA = '\u534d';
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
* http://www.symbols.com/encyclopedia/15/151.html to learn about
|
||||
* its meaning as relates to Buddhism.
|
||||
*/
|
||||
public static final char SWASTIKA = '\u5350';
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
public static final char PUA_MIN = '\uf021';
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
public static final char PUA_MAX = '\uf0ff';
|
||||
/**
|
||||
* the Wylie for U+0F3E
|
||||
*/
|
||||
public static final String U0F3E = "}";
|
||||
/**
|
||||
* the Wylie for U+0F3F
|
||||
*/
|
||||
public static final String U0F3F = "{";
|
||||
/**
|
||||
* the Wylie for U+0F86
|
||||
*/
|
||||
public static final String U0F86 = "\\u0F86";
|
||||
/**
|
||||
* the Wylie for U+0F87
|
||||
*/
|
||||
public static final String U0F87 = "\\u0F87";
|
||||
/**
|
||||
* the Wylie for U+0FC6
|
||||
*/
|
||||
public static final String U0FC6 = "\\u0FC6";
|
||||
/**
|
||||
* the Wylie for U+0F18
|
||||
*/
|
||||
public static final String U0F18 = "\\u0F18";
|
||||
/**
|
||||
* the Wylie for U+0F19
|
||||
*/
|
||||
public static final String U0F19 = "\\u0F19";
|
||||
/**
|
||||
* the Wylie for U+0F84
|
||||
*/
|
||||
public static final String U0F84 = "?";
|
||||
/**
|
||||
* the Wylie for U+0F7F
|
||||
*/
|
||||
public static final String U0F7F = "H";
|
||||
/**
|
||||
* the Wylie for U+0F35
|
||||
*/
|
||||
public static final String U0F35 = "~X";
|
||||
/**
|
||||
* the Wylie for U+0F37
|
||||
*/
|
||||
public static final String U0F37 = "X";
|
||||
/**
|
||||
* the Wylie for U+0F82
|
||||
*/
|
||||
|
@ -32,7 +103,7 @@ public interface THDLWylieConstants {
|
|||
/**
|
||||
* the Wylie for bindu/anusvara (U+0F7E)
|
||||
*/
|
||||
public static final char BINDU = 'M';
|
||||
public static final String BINDU = "M";
|
||||
/**
|
||||
* the Wylie for tsheg
|
||||
*/
|
||||
|
@ -64,31 +135,51 @@ public interface THDLWylieConstants {
|
|||
*/
|
||||
public static final String WYLIE_TSA_PHRU = "^";
|
||||
/**
|
||||
* the Wylie for achung
|
||||
* the Wylie for achung, \u0f60
|
||||
*/
|
||||
public static final char ACHUNG_character = '\'';
|
||||
/**
|
||||
* the Wylie for achung
|
||||
* the Wylie for achung, \u0f60
|
||||
*/
|
||||
public static final String ACHUNG
|
||||
= new String(new char[] { ACHUNG_character });
|
||||
/**
|
||||
* the Wylie for the 28th of the 30 consonants, sa:
|
||||
* the Wylie for the 28th of the 30 consonants, sa, \u0f66:
|
||||
*/
|
||||
public static final String SA = "s";
|
||||
/**
|
||||
* the Wylie for the consonant ra:
|
||||
* the Wylie for the consonant ra, \u0f62:
|
||||
*/
|
||||
public static final String RA = "r";
|
||||
/**
|
||||
* the Wylie for the 16th of the 30 consonants, ma:
|
||||
* the Wylie for the 16th of the 30 consonants, ma, \u0f58:
|
||||
*/
|
||||
public static final String MA = "m";
|
||||
/**
|
||||
* the Wylie for the 4th of the 30 consonants, nga:
|
||||
* the Wylie for \u0f56:
|
||||
*/
|
||||
public static final String BA = "b";
|
||||
/**
|
||||
* the Wylie for \u0f51:
|
||||
*/
|
||||
public static final String DA = "d";
|
||||
/**
|
||||
* the Wylie for \u0f42:
|
||||
*/
|
||||
public static final String GA = "g";
|
||||
/**
|
||||
* the Wylie for \u0f63:
|
||||
*/
|
||||
public static final String LA = "l";
|
||||
/**
|
||||
* the Wylie for the 4th of the 30 consonants, nga, \u0f44:
|
||||
*/
|
||||
public static final String NGA = "ng";
|
||||
/**
|
||||
* the Wylie for \u0f53:
|
||||
*/
|
||||
public static final String NA = "n";
|
||||
/**
|
||||
* the Wylie for achen
|
||||
*/
|
||||
public static final String ACHEN = "a";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue