Numerous EWTS->Unicode and especially EWTS->TMW improvements.
Fixed ordering of Unicode wowels. [ku+A] gives the correct Unicode now, e.g. EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A]. EWTS->TMW should now give errors any time the full input isn't used. Previously, wacky wowels like [kai+-i] would lead to some droppage. EWTS->TMW->Unicode testing is now in effect. This found a ton of EWTS->TMW bugs, most or all of which are fixed now. TMW->Unicode is improved/fixed for { \u5350,\u534D,\u0F88+k,\u0F88+kh,U }. (Why U? "\u0f75" is discouraged in favor of "\u0f71\u0f74".) NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly builds' sake, but I ran it in my sandbox and it passed.
This commit is contained in:
parent
36122778b4
commit
6d419fe641
19 changed files with 1014 additions and 547 deletions
|
@ -1377,7 +1377,7 @@ public void paste(int offset)
|
|||
if (TibetanMachineWeb.isPunc(val)) { //punctuation
|
||||
val = TibetanMachineWeb.getWylieForPunc(val);
|
||||
|
||||
if (val.charAt(0) == TibetanMachineWeb.BINDU)
|
||||
if (val.startsWith(THDLWylieConstants.BINDU))
|
||||
putBindu();
|
||||
|
||||
else {
|
||||
|
|
|
@ -242,8 +242,8 @@
|
|||
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 K+S+MA\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
|
||||
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 K+S+YA\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
|
||||
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 K+S+VA\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
|
||||
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
|
||||
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
|
||||
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+k to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
|
||||
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+kh to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
|
||||
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 KH+KHA\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
|
||||
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 KH+NA\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
|
||||
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 KH+LA\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
|
||||
|
@ -812,8 +812,8 @@
|
|||
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F13 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
|
||||
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie < to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
|
||||
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie > to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
|
||||
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
|
||||
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
|
||||
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u5350 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
|
||||
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u534D to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
|
||||
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF038 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
|
||||
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF037 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
|
||||
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 o\fs28\i0\b0\ul0\cf0 font 5; ord 101\par
|
||||
|
|
|
@ -242,8 +242,8 @@
|
|||
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 k+s+ma\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
|
||||
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 k+s+ya\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
|
||||
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 k+s+wa\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
|
||||
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
|
||||
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
|
||||
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0\\u0F88+k\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
|
||||
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0\\u0F88+kh\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
|
||||
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 kh+kha\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
|
||||
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 kh+na\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
|
||||
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 kh+la\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
|
||||
|
@ -812,8 +812,8 @@
|
|||
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0\\u0F13\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
|
||||
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 <\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
|
||||
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 >\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
|
||||
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
|
||||
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
|
||||
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0\\u5350\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
|
||||
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0\\u534D\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
|
||||
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0\\uF038\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
|
||||
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0\\uF037\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
|
||||
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 X\fs28\i0\b0\ul0\cf0 font 5; ord 101\par
|
||||
|
|
|
@ -18,9 +18,80 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text;
|
||||
|
||||
/** This is where basic, static knowledge of THDL's Extended Wylie is housed.
|
||||
/** This is where basic, static knowledge of THDL's Extended Wylie is
|
||||
* housed. <p>TODO(dchandler): tibwn.ini has all this, yes? So
|
||||
* extend TibetanMachineWeb if necessary and use a bunch of HashMaps
|
||||
* there! This is needless duplication.
|
||||
* @see TibetanMachineWeb */
|
||||
public interface THDLWylieConstants {
|
||||
// TODO(DLC)[EWTS->Tibetan]: what about U+2638, mentioned in Section
|
||||
// 9.11 "Tibetan" of the Unicode 4.0.1 standard? Why doesn't EWTS
|
||||
// mention it? (Because TMW has no glyph for it, I bet.) Do we
|
||||
// handle it well?
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
* http://www.symbols.com/encyclopedia/15/155.html to learn about
|
||||
* its meaning as relates to Buddhism.
|
||||
*/
|
||||
public static final char SAUVASTIKA = '\u534d';
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
* http://www.symbols.com/encyclopedia/15/151.html to learn about
|
||||
* its meaning as relates to Buddhism.
|
||||
*/
|
||||
public static final char SWASTIKA = '\u5350';
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
public static final char PUA_MIN = '\uf021';
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
public static final char PUA_MAX = '\uf0ff';
|
||||
/**
|
||||
* the Wylie for U+0F3E
|
||||
*/
|
||||
public static final String U0F3E = "}";
|
||||
/**
|
||||
* the Wylie for U+0F3F
|
||||
*/
|
||||
public static final String U0F3F = "{";
|
||||
/**
|
||||
* the Wylie for U+0F86
|
||||
*/
|
||||
public static final String U0F86 = "\\u0F86";
|
||||
/**
|
||||
* the Wylie for U+0F87
|
||||
*/
|
||||
public static final String U0F87 = "\\u0F87";
|
||||
/**
|
||||
* the Wylie for U+0FC6
|
||||
*/
|
||||
public static final String U0FC6 = "\\u0FC6";
|
||||
/**
|
||||
* the Wylie for U+0F18
|
||||
*/
|
||||
public static final String U0F18 = "\\u0F18";
|
||||
/**
|
||||
* the Wylie for U+0F19
|
||||
*/
|
||||
public static final String U0F19 = "\\u0F19";
|
||||
/**
|
||||
* the Wylie for U+0F84
|
||||
*/
|
||||
public static final String U0F84 = "?";
|
||||
/**
|
||||
* the Wylie for U+0F7F
|
||||
*/
|
||||
public static final String U0F7F = "H";
|
||||
/**
|
||||
* the Wylie for U+0F35
|
||||
*/
|
||||
public static final String U0F35 = "~X";
|
||||
/**
|
||||
* the Wylie for U+0F37
|
||||
*/
|
||||
public static final String U0F37 = "X";
|
||||
/**
|
||||
* the Wylie for U+0F82
|
||||
*/
|
||||
|
@ -32,7 +103,7 @@ public interface THDLWylieConstants {
|
|||
/**
|
||||
* the Wylie for bindu/anusvara (U+0F7E)
|
||||
*/
|
||||
public static final char BINDU = 'M';
|
||||
public static final String BINDU = "M";
|
||||
/**
|
||||
* the Wylie for tsheg
|
||||
*/
|
||||
|
@ -64,31 +135,51 @@ public interface THDLWylieConstants {
|
|||
*/
|
||||
public static final String WYLIE_TSA_PHRU = "^";
|
||||
/**
|
||||
* the Wylie for achung
|
||||
* the Wylie for achung, \u0f60
|
||||
*/
|
||||
public static final char ACHUNG_character = '\'';
|
||||
/**
|
||||
* the Wylie for achung
|
||||
* the Wylie for achung, \u0f60
|
||||
*/
|
||||
public static final String ACHUNG
|
||||
= new String(new char[] { ACHUNG_character });
|
||||
/**
|
||||
* the Wylie for the 28th of the 30 consonants, sa:
|
||||
* the Wylie for the 28th of the 30 consonants, sa, \u0f66:
|
||||
*/
|
||||
public static final String SA = "s";
|
||||
/**
|
||||
* the Wylie for the consonant ra:
|
||||
* the Wylie for the consonant ra, \u0f62:
|
||||
*/
|
||||
public static final String RA = "r";
|
||||
/**
|
||||
* the Wylie for the 16th of the 30 consonants, ma:
|
||||
* the Wylie for the 16th of the 30 consonants, ma, \u0f58:
|
||||
*/
|
||||
public static final String MA = "m";
|
||||
/**
|
||||
* the Wylie for the 4th of the 30 consonants, nga:
|
||||
* the Wylie for \u0f56:
|
||||
*/
|
||||
public static final String BA = "b";
|
||||
/**
|
||||
* the Wylie for \u0f51:
|
||||
*/
|
||||
public static final String DA = "d";
|
||||
/**
|
||||
* the Wylie for \u0f42:
|
||||
*/
|
||||
public static final String GA = "g";
|
||||
/**
|
||||
* the Wylie for \u0f63:
|
||||
*/
|
||||
public static final String LA = "l";
|
||||
/**
|
||||
* the Wylie for the 4th of the 30 consonants, nga, \u0f44:
|
||||
*/
|
||||
public static final String NGA = "ng";
|
||||
/**
|
||||
* the Wylie for \u0f53:
|
||||
*/
|
||||
public static final String NA = "n";
|
||||
/**
|
||||
* the Wylie for achen
|
||||
*/
|
||||
public static final String ACHEN = "a";
|
||||
|
|
|
@ -418,7 +418,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
chars.clear();
|
||||
|
||||
if (next.equals(String.valueOf(BINDU))) {
|
||||
if (next.equals(BINDU)) {
|
||||
if (glyphs.isEmpty())
|
||||
dc = null;
|
||||
else
|
||||
|
@ -560,11 +560,11 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* or null */
|
||||
public static void getBindu(List list, DuffCode dc) {
|
||||
if (null == dc) {
|
||||
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
list.add(TibetanMachineWeb.getGlyph(BINDU));
|
||||
} else {
|
||||
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
||||
list.add(dc);
|
||||
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
|
||||
list.add(TibetanMachineWeb.getGlyph(BINDU));
|
||||
} else {
|
||||
list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
|
||||
}
|
||||
|
|
|
@ -1347,12 +1347,26 @@ public static boolean isKnownHashKey(String hashKey) {
|
|||
* @see DuffCode
|
||||
*/
|
||||
public static DuffCode getGlyph(String hashKey) {
|
||||
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
||||
DuffCode dc = maybeGetGlyph(hashKey);
|
||||
if (null == dc)
|
||||
throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
|
||||
return dc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a glyph for this hash key if possible; returns null
|
||||
* otherwise.
|
||||
* @see #getGlyph(String)
|
||||
*/
|
||||
public static DuffCode maybeGetGlyph(String hashKey) {
|
||||
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
|
||||
if (null == dc)
|
||||
return null;
|
||||
return dc[TMW];
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Gets the half height character for this hash key.
|
||||
* @param hashKey the key you want a half height glyph for; see {@link
|
||||
|
@ -1783,6 +1797,8 @@ private static final String Unicode_tab = "\t";
|
|||
= new DuffCode[] { new DuffCode(1, (char)58) };
|
||||
private static final DuffCode[] tmwFor0F73
|
||||
= new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(1, (char)109) };
|
||||
private static final DuffCode[] tmwFor0F75
|
||||
= new DuffCode[] { new DuffCode(10, (char)126) };
|
||||
private static final DuffCode[] tmwFor0F76
|
||||
= new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(8, (char)87) };
|
||||
private static final DuffCode[] tmwFor0F77
|
||||
|
@ -1840,6 +1856,8 @@ private static final String Unicode_tab = "\t";
|
|||
return tmwFor0F6A;
|
||||
} else if ('\u0F73' == ch) {
|
||||
return tmwFor0F73;
|
||||
} else if ('\u0F75' == ch) {
|
||||
return tmwFor0F75;
|
||||
} else if ('\u0F76' == ch) {
|
||||
return tmwFor0F76;
|
||||
} else if ('\u0F77' == ch) {
|
||||
|
|
|
@ -927,6 +927,15 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
|
|||
// nyi.zla editor's mark. This is NOT \u0F82, although it looks very similar.
|
||||
\uF03A~91,5~~9,89~~~~~~~none
|
||||
|
||||
// yungs.drung (reversed):
|
||||
\u5350~97,5~~9,97~~~~~~~5350
|
||||
// yungs.drung (standard):
|
||||
\u534D~98,5~~9,98~~~~~~~534D
|
||||
|
||||
// utsama ka:
|
||||
\u0F88+k~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
|
||||
// utsama kha:
|
||||
\u0F88+kh~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
|
||||
|
||||
<?ToWylie?>
|
||||
M~238,1~~8,90~~~~~~~0F7E
|
||||
|
@ -1069,13 +1078,13 @@ A~204,2~~4,109~~~~~~~0F71
|
|||
A~205,2~~4,110~~~~~~~0F71
|
||||
A~206,2~~4,111~~~~~~~0F71
|
||||
A~207,2~~4,112~~~~~~~0F71
|
||||
U~211,2~~4,113~~~~~~~0F75
|
||||
U~212,2~~4,114~~~~~~~0F75
|
||||
U~213,2~~4,115~~~~~~~0F75
|
||||
U~214,2~~4,116~~~~~~~0F75
|
||||
U~215,2~~4,117~~~~~~~0F75
|
||||
U~216,2~~4,118~~~~~~~0F75
|
||||
U~217,2~~4,119~~~~~~~0F75
|
||||
U~211,2~~4,113~~~~~~~0F71,0F74
|
||||
U~212,2~~4,114~~~~~~~0F71,0F74
|
||||
U~213,2~~4,115~~~~~~~0F71,0F74
|
||||
U~214,2~~4,116~~~~~~~0F71,0F74
|
||||
U~215,2~~4,117~~~~~~~0F71,0F74
|
||||
U~216,2~~4,118~~~~~~~0F71,0F74
|
||||
U~217,2~~4,119~~~~~~~0F71,0F74
|
||||
u~224,2~~4,120~~~~~~~0F74
|
||||
u~225,2~~4,121~~~~~~~0F74
|
||||
u~226,2~~4,122~~~~~~~0F74
|
||||
|
@ -1090,13 +1099,13 @@ A~204,3~~6,109~~~~~~~0F71
|
|||
A~205,3~~6,110~~~~~~~0F71
|
||||
A~206,3~~6,111~~~~~~~0F71
|
||||
A~207,3~~6,112~~~~~~~0F71
|
||||
U~211,3~~6,113~~~~~~~0F75
|
||||
U~212,3~~6,114~~~~~~~0F75
|
||||
U~213,3~~6,115~~~~~~~0F75
|
||||
U~214,3~~6,116~~~~~~~0F75
|
||||
U~215,3~~6,117~~~~~~~0F75
|
||||
U~216,3~~6,118~~~~~~~0F75
|
||||
U~217,3~~6,119~~~~~~~0F75
|
||||
U~211,3~~6,113~~~~~~~0F71,0F74
|
||||
U~212,3~~6,114~~~~~~~0F71,0F74
|
||||
U~213,3~~6,115~~~~~~~0F71,0F74
|
||||
U~214,3~~6,116~~~~~~~0F71,0F74
|
||||
U~215,3~~6,117~~~~~~~0F71,0F74
|
||||
U~216,3~~6,118~~~~~~~0F71,0F74
|
||||
U~217,3~~6,119~~~~~~~0F71,0F74
|
||||
u~224,3~~6,120~~~~~~~0F74
|
||||
u~225,3~~6,121~~~~~~~0F74
|
||||
u~226,3~~6,122~~~~~~~0F74
|
||||
|
@ -1111,13 +1120,13 @@ A~204,4~~8,109~~~~~~~0F71
|
|||
A~205,4~~8,110~~~~~~~0F71
|
||||
A~206,4~~8,111~~~~~~~0F71
|
||||
A~207,4~~8,112~~~~~~~0F71
|
||||
U~211,4~~8,113~~~~~~~0F75
|
||||
U~212,4~~8,114~~~~~~~0F75
|
||||
U~213,4~~8,115~~~~~~~0F75
|
||||
U~214,4~~8,116~~~~~~~0F75
|
||||
U~215,4~~8,117~~~~~~~0F75
|
||||
U~216,4~~8,118~~~~~~~0F75
|
||||
U~217,4~~8,119~~~~~~~0F75
|
||||
U~211,4~~8,113~~~~~~~0F71,0F74
|
||||
U~212,4~~8,114~~~~~~~0F71,0F74
|
||||
U~213,4~~8,115~~~~~~~0F71,0F74
|
||||
U~214,4~~8,116~~~~~~~0F71,0F74
|
||||
U~215,4~~8,117~~~~~~~0F71,0F74
|
||||
U~216,4~~8,118~~~~~~~0F71,0F74
|
||||
U~217,4~~8,119~~~~~~~0F71,0F74
|
||||
u~224,4~~8,120~~~~~~~0F74
|
||||
u~225,4~~8,121~~~~~~~0F74
|
||||
u~226,4~~8,122~~~~~~~0F74
|
||||
|
@ -1131,13 +1140,13 @@ A~163,1~~10,116~~~~~~~0F71
|
|||
A~164,1~~10,117~~~~~~~0F71
|
||||
A~211,1~~10,118~~~~~~~0F71
|
||||
A~212,1~~10,119~~~~~~~0F71
|
||||
U~213,1~~10,120~~~~~~~0F75
|
||||
U~214,1~~10,121~~~~~~~0F75
|
||||
U~215,1~~10,122~~~~~~~0F75
|
||||
U~216,1~~10,123~~~~~~~0F75
|
||||
U~217,1~~10,124~~~~~~~0F75
|
||||
U~218,1~~10,125~~~~~~~0F75
|
||||
U~219,1~~10,126~~~~~~~0F75
|
||||
U~213,1~~10,120~~~~~~~0F71,0F74
|
||||
U~214,1~~10,121~~~~~~~0F71,0F74
|
||||
U~215,1~~10,122~~~~~~~0F71,0F74
|
||||
U~216,1~~10,123~~~~~~~0F71,0F74
|
||||
U~217,1~~10,124~~~~~~~0F71,0F74
|
||||
U~218,1~~10,125~~~~~~~0F71,0F74
|
||||
U~219,1~~10,126~~~~~~~0F71,0F74
|
||||
|
||||
// ra.mgo:
|
||||
r~173,4~~8,66~~~~~~~0F62
|
||||
|
@ -1191,13 +1200,3 @@ r~176,4~~8,71~~~~~~~0FB2
|
|||
\tmw8070~67,5~~9,70~~~~~~~none
|
||||
\tmw8071~68,5~~9,71~~~~~~~none
|
||||
\tmw8072~69,5~~9,72~~~~~~~none
|
||||
|
||||
// yungs.drung (reversed):
|
||||
\tmw8097~97,5~~9,97~~~~~~~5350
|
||||
// yungs.drung (standard):
|
||||
\tmw8098~98,5~~9,98~~~~~~~534D
|
||||
|
||||
// utsama ka:
|
||||
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
|
||||
// utsama kha:
|
||||
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
|
||||
|
|
|
@ -298,7 +298,7 @@ public class UnicodeUtils implements UnicodeConstants {
|
|||
characters will appear as themselves. */
|
||||
public static String unicodeCodepointToString(char cp,
|
||||
boolean shortenIfPossible) {
|
||||
return unicodeCodepointToString(cp, shortenIfPossible, "\\u");
|
||||
return unicodeCodepointToString(cp, shortenIfPossible, "\\u", false);
|
||||
}
|
||||
|
||||
/** Like {@link #unicodeCodepointToString(char, boolean)} if you
|
||||
|
@ -307,7 +307,8 @@ public class UnicodeUtils implements UnicodeConstants {
|
|||
<code>0F55</code>. */
|
||||
public static String unicodeCodepointToString(char cp,
|
||||
boolean shortenIfPossible,
|
||||
String prefix) {
|
||||
String prefix,
|
||||
boolean upperCase) {
|
||||
if (shortenIfPossible) {
|
||||
if ((cp >= 'a' && cp <= 'z')
|
||||
|| (cp >= 'A' && cp <= 'Z')
|
||||
|
@ -348,14 +349,16 @@ public class UnicodeUtils implements UnicodeConstants {
|
|||
return "\\r";
|
||||
}
|
||||
|
||||
String suffix;
|
||||
if (cp < '\u0010')
|
||||
return prefix + "000" + Integer.toHexString((int)cp);
|
||||
suffix = "000" + Integer.toHexString((int)cp);
|
||||
else if (cp < '\u0100')
|
||||
return prefix + "00" + Integer.toHexString((int)cp);
|
||||
suffix = "00" + Integer.toHexString((int)cp);
|
||||
else if (cp < '\u1000')
|
||||
return prefix + "0" + Integer.toHexString((int)cp);
|
||||
suffix = "0" + Integer.toHexString((int)cp);
|
||||
else
|
||||
return prefix + Integer.toHexString((int)cp);
|
||||
suffix = Integer.toHexString((int)cp);
|
||||
return prefix + (upperCase ? suffix.toUpperCase() : suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -546,10 +546,12 @@ public final class ACIPTraits implements TTraits {
|
|||
|
||||
/** Gets the duffcodes for wowel, such that they look good with
|
||||
* the preceding glyph, and appends them to duff. */
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
|
||||
throws ConversionException
|
||||
{
|
||||
if (null == wowel) return;
|
||||
if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion! Use assert.
|
||||
throw new IllegalArgumentException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
|
||||
throw new ConversionException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
|
@ -619,8 +621,10 @@ public final class ACIPTraits implements TTraits {
|
|||
try {
|
||||
return TPairListFactory.breakACIPIntoChunks(tt, sh);
|
||||
} catch (StackOverflowError e) {
|
||||
// TODO(dchandler): use ConversionException? Stop catching these?
|
||||
throw new IllegalArgumentException("Input too large[1]: " + tt);
|
||||
} catch (OutOfMemoryError e) {
|
||||
// TODO(dchandler): use ConversionException? Stop catching these?
|
||||
throw new IllegalArgumentException("Input too large[2]: " + tt);
|
||||
}
|
||||
}
|
||||
|
|
30
source/org/thdl/tib/text/ttt/ConversionException.java
Normal file
30
source/org/thdl/tib/text/ttt/ConversionException.java
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text.ttt;
|
||||
|
||||
/**
|
||||
* @author David Chandler
|
||||
*
|
||||
* <p>A ConversionException is a general-purpose checked exception
|
||||
* used to indicate a problem during conversion.
|
||||
*/
|
||||
public final class ConversionException extends Exception {
|
||||
/** @see Exception.Exception(String) */
|
||||
ConversionException(String x) { super(x); }
|
||||
}
|
|
@ -19,10 +19,12 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
|
||||
/** Tests this package's ability to understand EWTS and turn it into
|
||||
|
@ -76,42 +78,145 @@ public class EWTSTest extends TestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** Causes a JUnit test case failure unless the EWTS document ewts
|
||||
* converts to the unicode expectedUnicode. */
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
|
||||
// currently does, have this function convert to TMW and
|
||||
// convert that TMW to Unicode and verify that the result is
|
||||
// the same. Almost every call should allow for that.
|
||||
|
||||
/** Returns the Unicode corresponding to the TMW to which ewts
|
||||
* corresponds, or null if we couldn't push through, even with
|
||||
* errors, from EWTS->TMW->Unicode. */
|
||||
private static String ewts2tmw2uni(String ewts) {
|
||||
TTraits traits = EWTSTraits.instance();
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
null, true,
|
||||
"None", // TODO(DLC)[EWTS->Tibetan]: ???
|
||||
false /* short warnings */);
|
||||
if (null == unicode) {
|
||||
boolean shortMessages = true;
|
||||
String warningLevel = "All"; // slow but exercises more code paths
|
||||
ArrayList scan
|
||||
= traits.scanner().scan(ewts, errors, -1,
|
||||
shortMessages,
|
||||
warningLevel);
|
||||
if (null == scan)
|
||||
return null;
|
||||
if (errors.length() > 0)
|
||||
return null;
|
||||
errors = new StringBuffer();
|
||||
TibetanDocument tdoc = new TibetanDocument();
|
||||
boolean rv;
|
||||
try {
|
||||
rv = TConverter.convertToTMW(traits,
|
||||
scan, tdoc, errors, null, null,
|
||||
false, warningLevel,
|
||||
shortMessages, true,
|
||||
new int[] { tdoc.getLength() });
|
||||
} catch (java.io.IOException e) {
|
||||
// I doubt this can happen.
|
||||
throw new Error(e.toString());
|
||||
}
|
||||
if (!rv)
|
||||
return null;
|
||||
if (tdoc.getLength() < 1 && ewts.length() > 0)
|
||||
return null;
|
||||
errors = new StringBuffer();
|
||||
long numAttemptedReplacements[] = new long[] { 0 };
|
||||
tdoc.convertToUnicode(0, tdoc.getLength(), errors, null,
|
||||
numAttemptedReplacements);
|
||||
if (errors.length() > 0)
|
||||
return null;
|
||||
if (numAttemptedReplacements[0] < 1)
|
||||
return null;
|
||||
|
||||
try {
|
||||
return tdoc.getText(0, tdoc.getLength());
|
||||
} catch (javax.swing.text.BadLocationException e) {
|
||||
throw new Error("I know this won't happen: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
ewts2uni_test(ewts, expectedUnicode, true);
|
||||
}
|
||||
|
||||
/** Tests EWTS->Unicode but not EWTS->TMW[->Unicode]. */
|
||||
static void just_ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
ewts2uni_test(ewts, expectedUnicode, false);
|
||||
}
|
||||
|
||||
/** Causes a JUnit test case failure unless the EWTS document ewts
|
||||
* converts to the unicode expectedUnicode. If doEwts2tmw2uni is
|
||||
* true, then this causes a test case failure if an
|
||||
* EWTS->TMW->Unicode trip doesn't give the same
|
||||
* expectedUnicode. */
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode,
|
||||
boolean doEwts2tmw2uni) {
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode
|
||||
= TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
null, true,
|
||||
"None", // TODO(DLC)[EWTS->Tibetan]: ???
|
||||
false /* short warnings */);
|
||||
help_ewts2uni_test("EWTS->Unicode: ",
|
||||
ewts, expectedUnicode, unicode, errors);
|
||||
if (doEwts2tmw2uni) {
|
||||
help_ewts2uni_test("EWTS->TMW->Unicode: ",
|
||||
ewts, expectedUnicode, ewts2tmw2uni(ewts),
|
||||
new StringBuffer());
|
||||
}
|
||||
}
|
||||
|
||||
/** Doing EWTS->Unicode conversions yields one answer out of many
|
||||
* for some inputs, such as "b+ha". This function checks for
|
||||
* equality between two pieces of Unicode modulo such acceptable
|
||||
* changes. It's only complete enough to handle the test cases
|
||||
* we have. Why do we make two choices? TMW->Unicode is
|
||||
* different source code from EWTS->Unicode; that's why. */
|
||||
private static boolean ewts2uni_unicode_equality(String expectedUnicode,
|
||||
String actualUnicode) {
|
||||
// TODO(dchandler): replaceAll is a 1.4-ism. Will users balk?
|
||||
if (actualUnicode
|
||||
.replaceAll("\u0f0d\u0f0d", "\u0f0e") // TMW has no \u0f0e glyph
|
||||
.replaceAll("\u0f69", "\u0f40\u0fb5") // equivalent and neither are discouraged
|
||||
.replaceAll("\u0f43", "\u0f42\u0fb7") // ditto...
|
||||
.replaceAll("\u0f4d", "\u0f4c\u0fb7")
|
||||
.replaceAll("\u0f52", "\u0f51\u0fb7")
|
||||
.replaceAll("\u0f57", "\u0f56\u0fb7")
|
||||
.replaceAll("\u0f5c", "\u0f5b\u0fb7")
|
||||
.replaceAll("\u0fb9", "\u0f90\u0fb5")
|
||||
.replaceAll("\u0f93", "\u0f92\u0fb7")
|
||||
.replaceAll("\u0f9d", "\u0f9c\u0fb7")
|
||||
.replaceAll("\u0fa2", "\u0fa1\u0fb7")
|
||||
.replaceAll("\u0fa7", "\u0fa6\u0fb7") // ...
|
||||
.replaceAll("\u0fac", "\u0fab\u0fb7") // equivalent and neither are discouraged
|
||||
|
||||
.equals(expectedUnicode)) {
|
||||
return true;
|
||||
}
|
||||
return expectedUnicode.equals(actualUnicode);
|
||||
}
|
||||
|
||||
private static void help_ewts2uni_test(String prefix,
|
||||
String ewts,
|
||||
String expectedUnicode,
|
||||
String actualUnicode,
|
||||
StringBuffer errors) {
|
||||
if (null == actualUnicode) {
|
||||
if (null != expectedUnicode && "none" != expectedUnicode) {
|
||||
System.out.println("No unicode exists for " + ewts
|
||||
System.out.println(prefix + "No unicode exists for " + ewts
|
||||
+ " but you expected "
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
|
||||
assertTrue(false);
|
||||
}
|
||||
System.out.println("Unicode for " + ewts + " can't be had; errors are " + errors);
|
||||
System.out.println(prefix + "Unicode for " + ewts + " can't be had; errors are " + errors);
|
||||
} else {
|
||||
if (null != expectedUnicode && !expectedUnicode.equals(unicode)) {
|
||||
explainInequality(unicode, expectedUnicode, System.out);
|
||||
if (UnicodeUtils.unicodeStringToPrettyString(unicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
|
||||
System.out.println("UGLY strings: The unicode for\n \"" + ewts
|
||||
if (null != expectedUnicode
|
||||
&& !ewts2uni_unicode_equality(expectedUnicode, actualUnicode)) {
|
||||
explainInequality(actualUnicode, expectedUnicode, System.out);
|
||||
if (UnicodeUtils.unicodeStringToPrettyString(actualUnicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
|
||||
System.out.println(prefix + "UGLY strings: The unicode for\n \"" + ewts
|
||||
+ "\"\nis\n \""
|
||||
+ unicode
|
||||
+ actualUnicode
|
||||
+ "\",\nbut you expected\n \""
|
||||
+ expectedUnicode
|
||||
+ "\"");
|
||||
} else {
|
||||
System.out.println("The unicode for\n \"" + ewts
|
||||
System.out.println(prefix + "The unicode for\n \"" + ewts
|
||||
+ "\"\nis\n \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(unicode)
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(actualUnicode)
|
||||
+ "\",\nbut you expected\n \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)
|
||||
+ "\"");
|
||||
|
@ -122,7 +227,7 @@ public class EWTSTest extends TestCase {
|
|||
TPairList[] la
|
||||
= EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false);
|
||||
assertTrue(la[1] == null);
|
||||
System.out.println("EWTS=" + ewts + " and l'=" + la[0].toString2());
|
||||
System.out.println(prefix + "EWTS=" + ewts + " and l'=" + la[0].toString2());
|
||||
}
|
||||
assertTrue(false);
|
||||
}
|
||||
|
@ -156,24 +261,25 @@ public class EWTSTest extends TestCase {
|
|||
public void test0F39() {
|
||||
ewts2uni_test("v", "\u0F56\u0F39");
|
||||
ewts2uni_test("f", "\u0F55\u0F39");
|
||||
ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
|
||||
just_ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
|
||||
ewts2uni_test("faM", "\u0f55\u0f39\u0f7e");
|
||||
ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e");
|
||||
ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
|
||||
ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
|
||||
ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
|
||||
just_ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
|
||||
just_ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
|
||||
just_ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
|
||||
|
||||
ewts2uni_test("a^", "\u0f68\u0f39");
|
||||
ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
|
||||
ewts2uni_test("hUM^", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
|
||||
ewts2uni_test("ph^", "\u0f55\u0f39");
|
||||
ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
|
||||
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!?
|
||||
ewts2uni_test("phe^", "\u0f55\u0f39\u0f7a");
|
||||
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
|
||||
|
||||
|
||||
ewts2uni_test("a\u0f39", "\u0f68\u0f39");
|
||||
ewts2uni_test("hUM\u0f39", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
|
||||
ewts2uni_test("hUM\u0f39", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
|
||||
ewts2uni_test("ph\u0f39", "\u0f55\u0f39");
|
||||
ewts2uni_test("phe\u0f39", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
|
||||
ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!?
|
||||
ewts2uni_test("phe\u0f39", "\u0f55\u0f39\u0f7a");
|
||||
ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
|
||||
|
||||
if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
|
||||
}
|
||||
|
@ -181,6 +287,13 @@ public class EWTSTest extends TestCase {
|
|||
/** Tests that the EWTS->unicode converter isn't completely
|
||||
braindead. */
|
||||
public void testEwtsBasics() {
|
||||
just_ewts2uni_test("r+sa", "\u0f62\u0fb6");
|
||||
ewts2uni_test("R+s", "\u0f6a\u0fb6");
|
||||
|
||||
ewts2uni_test("k?e", "\u0f40\u0f84\u0f68\u0f7a");
|
||||
ewts2uni_test("ko+o", "\u0f40\u0f7c\u0f7c");
|
||||
ewts2uni_test("kau+u", "\u0f40\u0f74\u0f7d");
|
||||
|
||||
ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66");
|
||||
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
|
||||
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
|
||||
|
@ -220,39 +333,46 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
ewts2uni_test("b.ra ", "\u0f56\u0f62\u0f0b");
|
||||
ewts2uni_test("bara ", "\u0f56\u0f62\u0f0b");
|
||||
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
|
||||
just_ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
|
||||
}
|
||||
|
||||
/** Miscellaneous tests of EWTS->Unicode conversion. */
|
||||
public void test__EWTS__miscellany() {
|
||||
just_ewts2uni_test("ga\\u0f02ha", "\u0f42\u0f02\u0f67"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
|
||||
just_ewts2uni_test("g.\\u0f03\u0f0b", "\u0f42\u0f03\u0f0b"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
|
||||
|
||||
ewts2uni_test("", "");
|
||||
just_ewts2uni_test("k+\u0fb2e", "\u0f40\u0fb2\u0f7a");
|
||||
assert_EWTS_error("\u0f42ya");
|
||||
just_ewts2uni_test("\u0f42+ya", "\u0f42\u0fb1");
|
||||
just_ewts2uni_test("\u0f42.ya", "\u0f42\u0f61");
|
||||
|
||||
just_ewts2uni_test("", "");
|
||||
|
||||
ewts2uni_test("0\\u0f19", "\u0f20\u0f19");
|
||||
ewts2uni_test("0\\u0f18", "\u0f20\u0f18");
|
||||
ewts2uni_test("0\\u0f3e", "\u0f20\u0f3e"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
|
||||
ewts2uni_test("0\\u0f3f", "\u0f20\u0f3f"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
|
||||
|
||||
ewts2uni_test("R", "\u0f6A");
|
||||
ewts2uni_test("Ra", "\u0f6A");
|
||||
just_ewts2uni_test("R", "\u0f6A");
|
||||
just_ewts2uni_test("Ra", "\u0f6A");
|
||||
|
||||
ewts2uni_test("R+ka", "\u0F6A\u0f90");
|
||||
ewts2uni_test("k+Wa", "\u0f40\u0FBA");
|
||||
ewts2uni_test("k+Ya", "\u0f40\u0FBB");
|
||||
ewts2uni_test("k+Ra", "\u0f40\u0FBC");
|
||||
just_ewts2uni_test("R+ka", "\u0F6A\u0f90");
|
||||
just_ewts2uni_test("k+Wa", "\u0f40\u0FBA");
|
||||
just_ewts2uni_test("k+Ya", "\u0f40\u0FBB");
|
||||
just_ewts2uni_test("k+Ra", "\u0f40\u0FBC");
|
||||
ewts2uni_test("k+wa", "\u0f40\u0Fad");
|
||||
ewts2uni_test("k+la", "\u0f40\u0Fb3");
|
||||
ewts2uni_test("k+ya", "\u0f40\u0Fb1");
|
||||
ewts2uni_test("k+ra", "\u0f40\u0Fb2");
|
||||
|
||||
ewts2uni_test("r-I", "\u0f62\u0f81");
|
||||
ewts2uni_test("l-I", "\u0f63\u0f81");
|
||||
ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
|
||||
ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
|
||||
ewts2uni_test("r-i", "\u0f62\u0f80");
|
||||
ewts2uni_test("l-i", "\u0f63\u0f80");
|
||||
ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80");
|
||||
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f81");
|
||||
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f71\u0f80");
|
||||
ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80");
|
||||
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f81");
|
||||
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f71\u0f80");
|
||||
}
|
||||
|
||||
|
||||
|
@ -277,9 +397,9 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("u", "\u0f68\u0f74");
|
||||
ewts2uni_test("U", "\u0f68\u0f71\u0f74");
|
||||
ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80");
|
||||
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f81");
|
||||
ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
|
||||
ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f81");
|
||||
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f71\u0f80");
|
||||
just_ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
|
||||
just_ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f71\u0f80");
|
||||
ewts2uni_test("e", "\u0f68\u0f7a");
|
||||
ewts2uni_test("ai", "\u0f68\u0f7b");
|
||||
// ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]:
|
||||
|
@ -289,11 +409,12 @@ public class EWTSTest extends TestCase {
|
|||
// ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("-i", "\u0f68\u0f80");
|
||||
ewts2uni_test("-I", "\u0f68\u0f81");
|
||||
ewts2uni_test("-I", "\u0f68\u0f71\u0f80");
|
||||
// ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("\\u0f68", "\u0f68");
|
||||
just_ewts2uni_test("\\u0f68", "\u0f68");
|
||||
ewts2uni_test("\\u0f86", "\u0f68\u0f86"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("a\\u0f86", "\u0f68\u0f86");
|
||||
ewts2uni_test("a\\U0f86", "\u0f68\u0f86");
|
||||
ewts2uni_test("a\\U0F86", "\u0f68\u0f86");
|
||||
|
@ -305,7 +426,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("a\\u0f87", "\u0f68\u0f87");
|
||||
|
||||
// ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aHM", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("a", "\u0f68");
|
||||
|
||||
}
|
||||
|
@ -325,7 +446,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a");
|
||||
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7e\u0f7f");
|
||||
ewts2uni_test("u+A", "\u0f68\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("o+-I", "DLC");
|
||||
|
@ -342,9 +463,9 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("ku", "\u0f40\u0f74");
|
||||
ewts2uni_test("kU", "\u0f40\u0f71\u0f74");
|
||||
ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80");
|
||||
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f81");
|
||||
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f71\u0f80");
|
||||
ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80");
|
||||
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f81");
|
||||
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f71\u0f80");
|
||||
ewts2uni_test("ke", "\u0f40\u0f7a");
|
||||
ewts2uni_test("e", "\u0f68\u0f7a");
|
||||
ewts2uni_test("a", "\u0f68");
|
||||
|
@ -354,7 +475,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("kaM", "\u0f40\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("kaH", "\u0f40\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k-i", "\u0f40\u0f80");
|
||||
ewts2uni_test("k-I", "\u0f40\u0f81");
|
||||
ewts2uni_test("k-I", "\u0f40\u0f71\u0f80");
|
||||
ewts2uni_test("ka~M`", "\u0f40\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("ka~M", "\u0f40\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("ka?", "\u0f40\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
@ -369,7 +490,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("ka\\u0f87", "\u0f40\u0f87");
|
||||
|
||||
ewts2uni_test("kaMH", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("kaHM", "\u0f40\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("kaHM", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
|
@ -380,10 +501,10 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ko+e", "\u0f40\u0f7c\u0f7a");
|
||||
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("ku+A", "\u0f40\u0f74\u0f71");
|
||||
ewts2uni_test("ko+e", "\u0f40\u0f7a\u0f7c");
|
||||
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c");
|
||||
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
|
||||
ewts2uni_test("ku+A", "\u0f40\u0f71\u0f74");
|
||||
|
||||
ewts2uni_test("k", "\u0f40");
|
||||
ewts2uni_test("ka", "\u0f40");
|
||||
|
@ -414,7 +535,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("'aM", "\u0f60\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'aH", "\u0f60\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'-i", "\u0f60\u0f80");
|
||||
ewts2uni_test("'-I", "\u0f60\u0f81");
|
||||
ewts2uni_test("'-I", "\u0f60\u0f71\u0f80");
|
||||
ewts2uni_test("'a~M`", "\u0f60\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'a~M", "\u0f60\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'a?", "\u0f60\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
@ -429,7 +550,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("'a\\u0f87", "\u0f60\u0f87");
|
||||
|
||||
ewts2uni_test("'aMH", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'aHM", "\u0f60\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("'aHM", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
|
@ -440,19 +561,19 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'o+e", "\u0f60\u0f7c\u0f7a");
|
||||
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("'o+e", "\u0f60\u0f7a\u0f7c");
|
||||
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c");
|
||||
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
|
||||
|
||||
ewts2uni_test("'u+A", "\u0f60\u0f74\u0f71");
|
||||
ewts2uni_test("'u+A", "\u0f60\u0f71\u0f74");
|
||||
|
||||
ewts2uni_test("'", "\u0f60");
|
||||
ewts2uni_test("'a", "\u0f60");
|
||||
|
||||
ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
|
||||
ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f81");
|
||||
ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
|
||||
ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f81");
|
||||
just_ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
|
||||
just_ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f71\u0f80");
|
||||
just_ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
|
||||
just_ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f71\u0f80");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
|
@ -471,7 +592,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80");
|
||||
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f81");
|
||||
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f71\u0f80");
|
||||
ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
@ -486,7 +607,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87");
|
||||
|
||||
ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
|
@ -497,18 +618,18 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f74\u0f71");
|
||||
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7a\u0f7c");
|
||||
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c");
|
||||
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
|
||||
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f71\u0f74");
|
||||
|
||||
ewts2uni_test("k+Sh", "\u0f40\u0fb5");
|
||||
ewts2uni_test("k+Sha", "\u0f40\u0fb5");
|
||||
|
||||
ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
|
||||
ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f81");
|
||||
just_ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
|
||||
just_ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f71\u0f80");
|
||||
ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80");
|
||||
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f81");
|
||||
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f71\u0f80");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
|
@ -526,12 +647,12 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c");
|
||||
ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d");
|
||||
ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80");
|
||||
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f81");
|
||||
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f71\u0f80");
|
||||
ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84");
|
||||
|
||||
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
|
||||
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
|
||||
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
|
||||
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
|
||||
ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
|
||||
ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
|
@ -552,10 +673,10 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7c\u0f7a");
|
||||
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f74\u0f71");
|
||||
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7c");
|
||||
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c");
|
||||
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
|
||||
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f71\u0f74");
|
||||
|
||||
ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad");
|
||||
ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad");
|
||||
|
@ -566,7 +687,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
assert_EWTS_error("phywr-i");
|
||||
assert_EWTS_error("phyw+r-i");
|
||||
|
@ -579,55 +700,55 @@ public class EWTSTest extends TestCase {
|
|||
* (U+0F40,U+0F97,U+0F97,U+0F90,U+0F90,U+0F97) is correct. I
|
||||
* chose this stack as an example of an absurd stack. */
|
||||
public void test__EWTS__wowels_on_kjjkkj() {
|
||||
ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
|
||||
ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
|
||||
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
|
||||
ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
|
||||
ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
|
||||
ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
|
||||
ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80");
|
||||
ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f81");
|
||||
ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87");
|
||||
just_ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
|
||||
just_ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
|
||||
just_ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
just_ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
|
||||
just_ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
|
||||
just_ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
|
||||
just_ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
|
||||
just_ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
|
||||
just_ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
|
||||
just_ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80");
|
||||
just_ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f80");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87");
|
||||
|
||||
ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
just_ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
|
||||
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71");
|
||||
just_ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
just_ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
|
||||
just_ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
|
||||
just_ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
just_ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
just_ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
just_ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7c");
|
||||
just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c");
|
||||
just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
|
||||
just_ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
|
||||
|
||||
ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
|
||||
ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f81");
|
||||
ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
|
||||
ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f81");
|
||||
just_ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
just_ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
just_ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
|
||||
just_ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f71\u0f80");
|
||||
just_ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
|
||||
just_ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f71\u0f80");
|
||||
}
|
||||
|
||||
/** Tests that the EWTS that the spec says corresponds to each
|
||||
|
@ -644,14 +765,16 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("\\u0000", "\u0000");
|
||||
ewts2uni_test("\\u0eff", "\u0eff");
|
||||
}
|
||||
ewts2uni_test("\\u0f00", "\u0f00");
|
||||
ewts2uni_test("\\u0f40", "\u0f40");
|
||||
just_ewts2uni_test("\\u0f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
|
||||
just_ewts2uni_test("\\u0F02", "\u0F02"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
|
||||
just_ewts2uni_test("\\u0F03", "\u0F03"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
|
||||
just_ewts2uni_test("\\u0f40", "\u0f40");
|
||||
if (RUN_FAILING_TESTS) {
|
||||
assert_EWTS_error("\\u0f70"); // reserved codepoint
|
||||
assert_EWTS_error("\\u0fff"); // reserved codepoint
|
||||
ewts2uni_test("\\uf000", "\uf000");
|
||||
ewts2uni_test("\\uf01f", "\uf01f");
|
||||
ewts2uni_test("\\uefff", "\uefff");
|
||||
just_ewts2uni_test("\\uf000", "\uf000");
|
||||
just_ewts2uni_test("\\uf01f", "\uf01f");
|
||||
just_ewts2uni_test("\\uefff", "\uefff");
|
||||
}
|
||||
|
||||
|
||||
|
@ -661,11 +784,11 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("f", "\u0F55\u0F39");
|
||||
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
|
||||
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
|
||||
ewts2uni_test("\\u0f88+ka", "\u0f88\u0f90");
|
||||
ewts2uni_test("\\u0f88+kha", "\u0f88\u0f91");
|
||||
ewts2uni_test("oM",
|
||||
false ? "\u0F00" : "\u0f68\u0f7c\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: which is correct? see e-mail (maybe it was cfynn who thought \u0F00 ought not be generated?
|
||||
ewts2uni_test("\\u0F01", "\u0F01");
|
||||
ewts2uni_test("\\u0F02", "\u0F02");
|
||||
ewts2uni_test("\\u0F03", "\u0F03");
|
||||
ewts2uni_test("@", "\u0F04");
|
||||
ewts2uni_test("#", "\u0F05"); // TODO(DLC)[EWTS->Tibetan]: warning/error? [#] alone is nonsense.
|
||||
ewts2uni_test("$", "\u0F06");
|
||||
|
@ -777,9 +900,9 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("u", achen + "\u0F74");
|
||||
ewts2uni_test("U", achen + "\u0F71\u0F74");
|
||||
ewts2uni_test("a+r-i", achen + "\u0fb2\u0f80"); // not 0F76, which is discouraged by the Unicode standard
|
||||
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f81"); // not 0F77, which is discouraged by the Unicode standard
|
||||
ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
|
||||
ewts2uni_test("a+l-I", achen + "\u0fb3\u0f81"); // not 0F79, which is discouraged by the Unicode standard
|
||||
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f71\u0f80"); // not 0F77, which is discouraged by the Unicode standard
|
||||
just_ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
|
||||
just_ewts2uni_test("a+l-I", achen + "\u0fb3\u0f71\u0f80"); // not 0F79, which is discouraged by the Unicode standard
|
||||
ewts2uni_test("e", achen + "\u0F7A");
|
||||
ewts2uni_test("ai", achen + "\u0F7B");
|
||||
ewts2uni_test("o", achen + "\u0F7C");
|
||||
|
@ -787,7 +910,7 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("M", achen + "\u0F7E");
|
||||
ewts2uni_test("H", achen + "\u0F7F");
|
||||
ewts2uni_test("-i", achen + "\u0F80");
|
||||
ewts2uni_test("-I", achen + "\u0F81");
|
||||
ewts2uni_test("-I", achen + "\u0F71\u0F80");
|
||||
ewts2uni_test("~M`", achen + "\u0F82");
|
||||
ewts2uni_test("~M", achen + "\u0F83");
|
||||
ewts2uni_test("?", achen + "\u0F84"); // \u0f84 is a combiner
|
||||
|
@ -799,8 +922,8 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("\\u0F8A", "\u0F8A");
|
||||
ewts2uni_test("\\u0F8B", "\u0F8B");
|
||||
|
||||
final String ewts_for_superscript = "tsh+";
|
||||
final String unicode_for_superscript = "\u0f5a";
|
||||
final String ewts_for_superscript = "r+";
|
||||
final String unicode_for_superscript = "\u0f62";
|
||||
ewts2uni_test(ewts_for_superscript + "k",
|
||||
unicode_for_superscript + "\u0F90");
|
||||
ewts2uni_test(ewts_for_superscript + "kh",
|
||||
|
@ -812,10 +935,10 @@ public class EWTSTest extends TestCase {
|
|||
+ (false ? "\u0F93" : "\u0f92\u0fb7"));
|
||||
ewts2uni_test(ewts_for_superscript + "ng",
|
||||
unicode_for_superscript + "\u0F94");
|
||||
ewts2uni_test(ewts_for_superscript + "c",
|
||||
unicode_for_superscript + "\u0F95");
|
||||
ewts2uni_test(ewts_for_superscript + "ch",
|
||||
unicode_for_superscript + "\u0F96");
|
||||
just_ewts2uni_test(ewts_for_superscript + "c",
|
||||
unicode_for_superscript + "\u0F95");
|
||||
just_ewts2uni_test(ewts_for_superscript + "ch",
|
||||
unicode_for_superscript + "\u0F96");
|
||||
ewts2uni_test(ewts_for_superscript + "j",
|
||||
unicode_for_superscript + "\u0F97");
|
||||
ewts2uni_test(ewts_for_superscript + "ny",
|
||||
|
@ -826,9 +949,9 @@ public class EWTSTest extends TestCase {
|
|||
unicode_for_superscript + "\u0F9B");
|
||||
ewts2uni_test(ewts_for_superscript + "D",
|
||||
unicode_for_superscript + "\u0F9C");
|
||||
ewts2uni_test(ewts_for_superscript + "D+h",
|
||||
unicode_for_superscript
|
||||
+ (false ? "\u0F9D" : "\u0f9c\u0fb7"));
|
||||
just_ewts2uni_test(ewts_for_superscript + "D+h",
|
||||
unicode_for_superscript
|
||||
+ (false ? "\u0F9D" : "\u0f9c\u0fb7"));
|
||||
ewts2uni_test(ewts_for_superscript + "N",
|
||||
unicode_for_superscript + "\u0F9E");
|
||||
ewts2uni_test(ewts_for_superscript + "t",
|
||||
|
@ -844,8 +967,8 @@ public class EWTSTest extends TestCase {
|
|||
unicode_for_superscript + "\u0FA3");
|
||||
ewts2uni_test(ewts_for_superscript + "p",
|
||||
unicode_for_superscript + "\u0FA4");
|
||||
ewts2uni_test(ewts_for_superscript + "ph",
|
||||
unicode_for_superscript + "\u0FA5");
|
||||
just_ewts2uni_test(ewts_for_superscript + "ph",
|
||||
unicode_for_superscript + "\u0FA5");
|
||||
ewts2uni_test(ewts_for_superscript + "b",
|
||||
unicode_for_superscript + "\u0FA6");
|
||||
ewts2uni_test(ewts_for_superscript + "b+h",
|
||||
|
@ -859,119 +982,122 @@ public class EWTSTest extends TestCase {
|
|||
unicode_for_superscript + "\u0FAA");
|
||||
ewts2uni_test(ewts_for_superscript + "dz",
|
||||
unicode_for_superscript + "\u0FAB");
|
||||
ewts2uni_test(ewts_for_superscript + "dz+h",
|
||||
unicode_for_superscript
|
||||
+ (false ? "\u0FAC" : "\u0fab\u0fb7"));
|
||||
just_ewts2uni_test(ewts_for_superscript + "dz+h",
|
||||
unicode_for_superscript
|
||||
+ (false ? "\u0FAC" : "\u0fab\u0fb7"));
|
||||
ewts2uni_test(ewts_for_superscript + "w",
|
||||
unicode_for_superscript + "\u0FAD");
|
||||
ewts2uni_test(ewts_for_superscript + "zh",
|
||||
unicode_for_superscript + "\u0FAE");
|
||||
ewts2uni_test(ewts_for_superscript + "z",
|
||||
unicode_for_superscript + "\u0FAF");
|
||||
ewts2uni_test(ewts_for_superscript + "'",
|
||||
unicode_for_superscript + "\u0FB0");
|
||||
ewts2uni_test(ewts_for_superscript + "y",
|
||||
unicode_for_superscript + "\u0FB1");
|
||||
ewts2uni_test(ewts_for_superscript + "r",
|
||||
unicode_for_superscript + "\u0FB2");
|
||||
just_ewts2uni_test(ewts_for_superscript + "zh",
|
||||
unicode_for_superscript + "\u0FAE");
|
||||
just_ewts2uni_test(ewts_for_superscript + "z",
|
||||
unicode_for_superscript + "\u0FAF");
|
||||
just_ewts2uni_test(ewts_for_superscript + "'",
|
||||
unicode_for_superscript + "\u0FB0");
|
||||
just_ewts2uni_test(ewts_for_superscript + "y",
|
||||
unicode_for_superscript + "\u0FB1");
|
||||
just_ewts2uni_test(ewts_for_superscript + "r",
|
||||
unicode_for_superscript + "\u0FB2");
|
||||
ewts2uni_test(ewts_for_superscript + "l",
|
||||
unicode_for_superscript + "\u0FB3");
|
||||
ewts2uni_test(ewts_for_superscript + "sh",
|
||||
unicode_for_superscript + "\u0FB4");
|
||||
ewts2uni_test(ewts_for_superscript + "Sh",
|
||||
unicode_for_superscript + "\u0FB5");
|
||||
ewts2uni_test(ewts_for_superscript + "s",
|
||||
unicode_for_superscript + "\u0FB6");
|
||||
just_ewts2uni_test(ewts_for_superscript + "sh",
|
||||
unicode_for_superscript + "\u0FB4");
|
||||
just_ewts2uni_test(ewts_for_superscript + "Sh",
|
||||
unicode_for_superscript + "\u0FB5");
|
||||
just_ewts2uni_test(ewts_for_superscript + "s",
|
||||
unicode_for_superscript + "\u0FB6");
|
||||
ewts2uni_test(ewts_for_superscript + "h",
|
||||
unicode_for_superscript + "\u0FB7");
|
||||
ewts2uni_test(ewts_for_superscript + "a",
|
||||
unicode_for_superscript + "\u0FB8");
|
||||
just_ewts2uni_test(ewts_for_superscript + "a",
|
||||
unicode_for_superscript + "\u0FB8");
|
||||
ewts2uni_test(ewts_for_superscript + "k+Sh",
|
||||
unicode_for_superscript
|
||||
+ (false ? "\u0FB9" : "\u0f90\u0fb5"));
|
||||
ewts2uni_test(ewts_for_superscript + "W",
|
||||
unicode_for_superscript + "\u0FBA");
|
||||
ewts2uni_test(ewts_for_superscript + "Y",
|
||||
unicode_for_superscript + "\u0FBB");
|
||||
ewts2uni_test(ewts_for_superscript + "R",
|
||||
unicode_for_superscript + "\u0FBC");
|
||||
just_ewts2uni_test(ewts_for_superscript + "W",
|
||||
unicode_for_superscript + "\u0FBA");
|
||||
just_ewts2uni_test(ewts_for_superscript + "Y",
|
||||
unicode_for_superscript + "\u0FBB");
|
||||
just_ewts2uni_test(ewts_for_superscript + "R",
|
||||
unicode_for_superscript + "\u0FBC");
|
||||
|
||||
ewts2uni_test("\\u0FBE", "\u0FBE");
|
||||
ewts2uni_test("\\u0FBF", "\u0FBF");
|
||||
ewts2uni_test("\\u0FC0", "\u0FC0");
|
||||
ewts2uni_test("\\u0FC1", "\u0FC1");
|
||||
ewts2uni_test("\\u0FC2", "\u0FC2");
|
||||
ewts2uni_test("\\u0FC3", "\u0FC3");
|
||||
ewts2uni_test("\\u0FC4", "\u0FC4");
|
||||
ewts2uni_test("\\u0FC5", "\u0FC5");
|
||||
ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
|
||||
ewts2uni_test("\\u0FC7", "\u0FC7");
|
||||
ewts2uni_test("\\u0FC8", "\u0FC8");
|
||||
ewts2uni_test("\\u0FC9", "\u0FC9");
|
||||
ewts2uni_test("\\u0FCA", "\u0FCA");
|
||||
ewts2uni_test("\\u0FCB", "\u0FCB");
|
||||
ewts2uni_test("\\u0FCC", "\u0FCC");
|
||||
ewts2uni_test("\\u0FCF", "\u0FCF");
|
||||
ewts2uni_test("\\u0FD0", "\u0FD0");
|
||||
ewts2uni_test("\\u0FD1", "\u0FD1");
|
||||
just_ewts2uni_test("\\u0FBE", "\u0FBE");
|
||||
just_ewts2uni_test("\\u0FBF", "\u0FBF");
|
||||
just_ewts2uni_test("\\u0FC0", "\u0FC0");
|
||||
just_ewts2uni_test("\\u0FC1", "\u0FC1");
|
||||
just_ewts2uni_test("\\u0FC2", "\u0FC2");
|
||||
just_ewts2uni_test("\\u0FC3", "\u0FC3");
|
||||
just_ewts2uni_test("\\u0FC4", "\u0FC4");
|
||||
just_ewts2uni_test("\\u0FC5", "\u0FC5");
|
||||
just_ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
|
||||
just_ewts2uni_test("\\u0FC7", "\u0FC7");
|
||||
just_ewts2uni_test("\\u0FC8", "\u0FC8");
|
||||
just_ewts2uni_test("\\u0FC9", "\u0FC9");
|
||||
just_ewts2uni_test("\\u0FCA", "\u0FCA");
|
||||
just_ewts2uni_test("\\u0FCB", "\u0FCB");
|
||||
just_ewts2uni_test("\\u0FCC", "\u0FCC");
|
||||
just_ewts2uni_test("\\u0FCF", "\u0FCF");
|
||||
just_ewts2uni_test("\\u0FD0", "\u0FD0");
|
||||
just_ewts2uni_test("\\u0FD1", "\u0FD1");
|
||||
ewts2uni_test("_", "\u00a0"); // tibwn.ini says that the Unicode spec wants a non-breaking space.
|
||||
ewts2uni_test("\\u534D", "\u534D");
|
||||
ewts2uni_test("\\u5350", "\u5350");
|
||||
ewts2uni_test("\u534D", "\u534D");
|
||||
ewts2uni_test("\u5350", "\u5350");
|
||||
ewts2uni_test("\\u0F88+k", "\u0F88\u0F90");
|
||||
ewts2uni_test("\\u0F88+kh", "\u0F88\u0F91");
|
||||
/* TODO(DLC)[EWTS->Tibetan]:
|
||||
|
||||
Do we want to ever generate \uf021? (NOT \u0f21, but the
|
||||
private-use area (PUA) of Unicode). EWTS->TMW and this
|
||||
makes sense, but EWTS->Unicode? */
|
||||
ewts2uni_test("\\uF021", "\uF021");
|
||||
ewts2uni_test("\\uF022", "\uF022");
|
||||
ewts2uni_test("\\uF023", "\uF023");
|
||||
ewts2uni_test("\\uF024", "\uF024");
|
||||
ewts2uni_test("\\uF025", "\uF025");
|
||||
ewts2uni_test("\\uF026", "\uF026");
|
||||
ewts2uni_test("\\uF027", "\uF027");
|
||||
ewts2uni_test("\\uF028", "\uF028");
|
||||
ewts2uni_test("\\uF029", "\uF029");
|
||||
ewts2uni_test("\\uF02A", "\uF02A");
|
||||
ewts2uni_test("\\uF02B", "\uF02B");
|
||||
ewts2uni_test("\\uF02C", "\uF02C");
|
||||
ewts2uni_test("\\uF02D", "\uF02D");
|
||||
ewts2uni_test("\\uF02E", "\uF02E");
|
||||
ewts2uni_test("\\uF02F", "\uF02F");
|
||||
ewts2uni_test("\\uF030", "\uF030");
|
||||
ewts2uni_test("\\uF031", "\uF031");
|
||||
ewts2uni_test("\\uF032", "\uF032");
|
||||
ewts2uni_test("\\uF033", "\uF033");
|
||||
ewts2uni_test("\\uF034", "\uF034");
|
||||
ewts2uni_test("\\uF035", "\uF035");
|
||||
ewts2uni_test("\\uF036", "\uF036");
|
||||
ewts2uni_test("\\uF037", "\uF037");
|
||||
ewts2uni_test("\\uF038", "\uF038");
|
||||
ewts2uni_test("\\uF039", "\uF039");
|
||||
ewts2uni_test("\\uF03A", "\uF03A");
|
||||
ewts2uni_test("\\uF03B", "\uF03B");
|
||||
ewts2uni_test("\\uF03C", "\uF03C");
|
||||
ewts2uni_test("\\uF03D", "\uF03D");
|
||||
ewts2uni_test("\\uF03E", "\uF03E");
|
||||
ewts2uni_test("\\uF03F", "\uF03F");
|
||||
ewts2uni_test("\\uF040", "\uF040");
|
||||
ewts2uni_test("\\uF041", "\uF041");
|
||||
ewts2uni_test("\\uF042", "\uF042");
|
||||
makes sense, but EWTS->Unicode? Shouldn't we match the
|
||||
behavior of TMW->Unicode, regardless? */
|
||||
just_ewts2uni_test("\\uF021", "\uF021");
|
||||
just_ewts2uni_test("\\uF022", "\uF022");
|
||||
just_ewts2uni_test("\\uF023", "\uF023");
|
||||
just_ewts2uni_test("\\uF024", "\uF024");
|
||||
just_ewts2uni_test("\\uF025", "\uF025");
|
||||
just_ewts2uni_test("\\uF026", "\uF026");
|
||||
just_ewts2uni_test("\\uF027", "\uF027");
|
||||
just_ewts2uni_test("\\uF028", "\uF028");
|
||||
just_ewts2uni_test("\\uF029", "\uF029");
|
||||
just_ewts2uni_test("\\uF02A", "\uF02A");
|
||||
just_ewts2uni_test("\\uF02B", "\uF02B");
|
||||
just_ewts2uni_test("\\uF02C", "\uF02C");
|
||||
just_ewts2uni_test("\\uF02D", "\uF02D");
|
||||
just_ewts2uni_test("\\uF02E", "\uF02E");
|
||||
just_ewts2uni_test("\\uF02F", "\uF02F");
|
||||
just_ewts2uni_test("\\uF030", "\uF030");
|
||||
just_ewts2uni_test("\\uF031", "\uF031");
|
||||
just_ewts2uni_test("\\uF032", "\uF032");
|
||||
just_ewts2uni_test("\\uF033", "\uF033");
|
||||
just_ewts2uni_test("\\uF034", "\uF034");
|
||||
just_ewts2uni_test("\\uF035", "\uF035");
|
||||
just_ewts2uni_test("\\uF036", "\uF036");
|
||||
just_ewts2uni_test("\\uF037", "\uF037");
|
||||
just_ewts2uni_test("\\uF038", "\uF038");
|
||||
just_ewts2uni_test("\\uF039", "\uF039");
|
||||
just_ewts2uni_test("\\uF03A", "\uF03A");
|
||||
just_ewts2uni_test("\\uF03B", "\uF03B");
|
||||
just_ewts2uni_test("\\uF03C", "\uF03C");
|
||||
just_ewts2uni_test("\\uF03D", "\uF03D");
|
||||
just_ewts2uni_test("\\uF03E", "\uF03E");
|
||||
just_ewts2uni_test("\\uF03F", "\uF03F");
|
||||
just_ewts2uni_test("\\uF040", "\uF040");
|
||||
just_ewts2uni_test("\\uF041", "\uF041");
|
||||
just_ewts2uni_test("\\uF042", "\uF042");
|
||||
}
|
||||
|
||||
public void test__EWTS__long_wowels() {
|
||||
ewts2uni_test("k-I~M`~X", "\u0f40\u0f81\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
|
||||
ewts2uni_test("k-I~M`~X", "\u0f40\u0f71\u0f80\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
|
||||
}
|
||||
|
||||
public void test__EWTS__32bit_unicode_escapes() {
|
||||
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
|
||||
ewts2uni_test("\\uF0010000",
|
||||
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
|
||||
just_ewts2uni_test("\\uF0010000",
|
||||
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
|
||||
if (RUN_FAILING_TESTS) assert_EWTS_error("\\uF0010000"); // TODO(DLC)[EWTS->Tibetan]: error subsystem is hosed
|
||||
if (RUN_FAILING_TESTS) {
|
||||
ewts2uni_test("\\ucafe0000",
|
||||
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
|
||||
just_ewts2uni_test("\\ucafe0000",
|
||||
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
|
||||
// TODO(dchandler): make it "\ucafe0000");
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
|
@ -1003,8 +1129,8 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("\\u00000000", "\u0000");
|
||||
ewts2uni_test("\\u00000eff", "\u0eff");
|
||||
}
|
||||
ewts2uni_test("\\u00000f00", "\u0f00");
|
||||
ewts2uni_test("\\u00000f40", "\u0f40");
|
||||
just_ewts2uni_test("\\u00000f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: EWTS->TMW is broken for this
|
||||
just_ewts2uni_test("\\u00000f40", "\u0f40");
|
||||
if (RUN_FAILING_TESTS) {
|
||||
ewts2uni_test("\\u00000f70", "\u0f70");
|
||||
ewts2uni_test("\\u00000fff", "\u0fff");
|
||||
|
@ -1089,22 +1215,33 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
if (RUN_FAILING_TESTS) {
|
||||
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
|
||||
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
|
||||
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f71\u0f80");
|
||||
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
|
||||
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
|
||||
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f71\u0f80");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testMoreMiscellany() {
|
||||
ewts2uni_test("k+Sh+R-i", "\u0f40\u0fb5\u0fbc\u0f80");
|
||||
|
||||
ewts2uni_test("k\\u0f35", "\u0f40\u0f35");
|
||||
ewts2uni_test("k\\u0f72", "\u0f40\u0f72");
|
||||
ewts2uni_test("k\\u0f73", "\u0f40\u0f71\u0f72");
|
||||
ewts2uni_test("k\\u0f75", "\u0f40\u0f71\u0f74");
|
||||
ewts2uni_test("k\\u0f3e", "\u0f40\u0f3e");
|
||||
ewts2uni_test("k\\u0f3f", "\u0f40\u0f3f");
|
||||
|
||||
ewts2uni_test("kHai", "\u0f40\u0f7f\u0f68\u0f7b"); // TODO(DLC)[EWTS->Tibetan]: Is this correct?
|
||||
|
||||
ewts2uni_test("r-i", "\u0f62\u0f80");
|
||||
ewts2uni_test("r-I", "\u0f62\u0f81");
|
||||
ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
|
||||
ewts2uni_test("l-i", "\u0f63\u0f80");
|
||||
ewts2uni_test("l-I", "\u0f63\u0f81");
|
||||
ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
|
||||
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
|
||||
ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
|
||||
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
|
||||
ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
|
||||
just_ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
|
||||
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
|
||||
just_ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
|
||||
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
|
||||
ewts2uni_test("'jam",
|
||||
"\u0f60\u0f47\u0f58");
|
||||
ewts2uni_test("jamX 'jam~X",
|
||||
|
|
|
@ -21,6 +21,7 @@ Contributor(s): ______________________________________.
|
|||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
import org.thdl.tib.text.DuffCode;
|
||||
|
@ -102,124 +103,172 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
public boolean isWowel(String s) {
|
||||
return (getUnicodeForWowel(s) != null);
|
||||
/* TODO(DLC)[EWTS->Tibetan]: test ko+m+e etc.
|
||||
// TODO(DLC)[EWTS->Tibetan]: all non-consonant combiners? 0f71 0f87 etc.?
|
||||
if (s.length() == 1 && isUnicodeWowel(s.charAt(0))) return true;
|
||||
return ("a".equals(s)
|
||||
|| "e".equals(s)
|
||||
|| "i".equals(s)
|
||||
|| "o".equals(s)
|
||||
|| "u".equals(s)
|
||||
|| "U".equals(s)
|
||||
|| "I".equals(s)
|
||||
|| "A".equals(s)
|
||||
|| "-i".equals(s)
|
||||
|| "-I".equals(s)
|
||||
|| "au".equals(s)
|
||||
|| "ai".equals(s)
|
||||
|| isWowelThatRequiresAChen(s));
|
||||
// TODO(DLC)[EWTS->Tibetan]:???
|
||||
*/
|
||||
}
|
||||
|
||||
public String aVowel() { return "a"; }
|
||||
public String aVowel() { return THDLWylieConstants.WYLIE_aVOWEL; }
|
||||
|
||||
public boolean isPostsuffix(String s) {
|
||||
return ("s".equals(s) || "d".equals(s));
|
||||
}
|
||||
|
||||
public boolean isPrefix(String l) {
|
||||
return ("'".equals(l)
|
||||
|| "m".equals(l)
|
||||
|| "b".equals(l)
|
||||
|| "d".equals(l)
|
||||
|| "g".equals(l));
|
||||
return (THDLWylieConstants.ACHUNG.equals(l)
|
||||
|| THDLWylieConstants.MA.equals(l)
|
||||
|| THDLWylieConstants.BA.equals(l)
|
||||
|| THDLWylieConstants.DA.equals(l)
|
||||
|| THDLWylieConstants.GA.equals(l));
|
||||
}
|
||||
|
||||
public boolean isSuffix(String l) {
|
||||
return ("s".equals(l)
|
||||
|| "g".equals(l)
|
||||
|| "d".equals(l)
|
||||
|| "m".equals(l)
|
||||
|| "'".equals(l)
|
||||
|| "b".equals(l)
|
||||
|| "ng".equals(l)
|
||||
|| "n".equals(l)
|
||||
|| "l".equals(l)
|
||||
|| "r".equals(l));
|
||||
return (isPrefix(l)
|
||||
|| THDLWylieConstants.SA.equals(l)
|
||||
|| THDLWylieConstants.NGA.equals(l)
|
||||
|| THDLWylieConstants.NA.equals(l)
|
||||
|| THDLWylieConstants.LA.equals(l)
|
||||
|| THDLWylieConstants.RA.equals(l));
|
||||
}
|
||||
|
||||
/** Returns l, since this is EWTS's traits class. */
|
||||
public String getEwtsForConsonant(String l) { return l; }
|
||||
/** Returns the best EWTS for l, which is often l but not always
|
||||
* thanks to Unicode escapes. NOTE: For "\u0f42", you don't want
|
||||
* to return "g" lest "\\u0f42ya " become the wrong thing under
|
||||
* EWTS->Unicode. */
|
||||
public String getEwtsForConsonant(String l) {
|
||||
return helpGetEwts(l);
|
||||
}
|
||||
|
||||
/** Returns l, since this is EWTS's traits class. */
|
||||
public String getEwtsForOther(String l) { return l; }
|
||||
/** Returns the best EWTS for l, which is often l but not always
|
||||
* thanks to Unicode escapes. */
|
||||
public String getEwtsForOther(String l) {
|
||||
return helpGetEwts(l);
|
||||
}
|
||||
|
||||
private String helpGetEwts(String l) {
|
||||
if (l.length() == 1
|
||||
&& ((l.charAt(0) >= THDLWylieConstants.PUA_MIN
|
||||
&& l.charAt(0) <= THDLWylieConstants.PUA_MAX)
|
||||
|| 0 <= "\u0F01\u0F09\u0F0A\u0F10\u0F12\u0F13\u0F15\u0F16\u0F17\u0F18\u0F19\u0F1A\u0F1B\u0F1C\u0F1D\u0F1E\u0F1F\u0F2A\u0F2B\u0F2C\u0F2D\u0F2E\u0F2F\u0F30\u0F31\u0F32\u0F33\u0F36\u0F38\u0F86\u0F87\u0F88\u0F89\u0F8A\u0F8B\u0FBE\u0FBF\u0FC0\u0FC1\u0FC2\u0FC3\u0FC4\u0FC5\u0FC6\u0FC7\u0FC8\u0FC9\u0FCA\u0FCB\u0FCC\u0FCF\u5350\u534D".indexOf(l.charAt(0)))) {
|
||||
return UnicodeUtils.unicodeCodepointToString(l.charAt(0), false, "\\u", true);
|
||||
}
|
||||
if (false) { // TODO(dchandler): it's too late in the game to do this. EWTS->TMW is broken for \u0f00, \u0f02, and \u0f03 right now, fix that.
|
||||
if ("\u0f02".equals(l)) return "u~M`H"; // too long for a single hash key, see?
|
||||
if ("\u0f03".equals(l)) return "u~M`:"; // ditto
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
/** Returns l, since this is EWTS's traits class. */
|
||||
public String getEwtsForWowel(String l) { return l; }
|
||||
|
||||
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
|
||||
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
|
||||
throws IllegalArgumentException
|
||||
/** If needle is found in haystack, then haystack without the
|
||||
* first instance of needle is returned. Otherwise haystack
|
||||
* itself is returned. */
|
||||
private static String removeFirstMatch(String haystack, String needle) {
|
||||
int ix;
|
||||
if ((ix = haystack.indexOf(needle)) >= 0) {
|
||||
StringBuffer sb = new StringBuffer(haystack);
|
||||
sb.replace(ix, ix + needle.length(), "");
|
||||
return sb.toString();
|
||||
}
|
||||
return haystack;
|
||||
}
|
||||
|
||||
private static HashMap bestEwtsMap = null;
|
||||
private static String getBestEwtsForSingleWowel(String wowel) {
|
||||
// NOTE: Not MT-safe
|
||||
if (null == bestEwtsMap) {
|
||||
bestEwtsMap = new HashMap(20);
|
||||
// Unicode-escape sequences are handled early. To be
|
||||
// correct, we must "unescape" here any Unicode escape to
|
||||
// whatever tibwn.ini has. (TODO(dchandler): tibwn.ini
|
||||
// has this info, use that instead of duplicating it in
|
||||
// this code.)
|
||||
bestEwtsMap.put("\u0f18", THDLWylieConstants.U0F18);
|
||||
bestEwtsMap.put("\u0f19", THDLWylieConstants.U0F19);
|
||||
bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
|
||||
bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
|
||||
bestEwtsMap.put("\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU);
|
||||
bestEwtsMap.put("\u0f3e", THDLWylieConstants.U0F3E);
|
||||
bestEwtsMap.put("\u0f3f", THDLWylieConstants.U0F3F);
|
||||
bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
|
||||
bestEwtsMap.put("\u0f86", THDLWylieConstants.U0F86);
|
||||
bestEwtsMap.put("\u0f87", THDLWylieConstants.U0F87);
|
||||
bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
|
||||
|
||||
bestEwtsMap.put("\u0f71", THDLWylieConstants.A_VOWEL);
|
||||
bestEwtsMap.put("\u0f72", THDLWylieConstants.i_VOWEL);
|
||||
bestEwtsMap.put("\u0f74", THDLWylieConstants.u_VOWEL);
|
||||
bestEwtsMap.put("\u0f7a", THDLWylieConstants.e_VOWEL);
|
||||
bestEwtsMap.put("\u0f7b", THDLWylieConstants.ai_VOWEL);
|
||||
bestEwtsMap.put("\u0f7c", THDLWylieConstants.o_VOWEL);
|
||||
bestEwtsMap.put("\u0f7d", THDLWylieConstants.au_VOWEL);
|
||||
bestEwtsMap.put("\u0f7e", THDLWylieConstants.BINDU);
|
||||
bestEwtsMap.put("\u0f80", THDLWylieConstants.reverse_i_VOWEL);
|
||||
bestEwtsMap.put("\u0f81", THDLWylieConstants.reverse_I_VOWEL);
|
||||
|
||||
bestEwtsMap.put("\u0f73", THDLWylieConstants.I_VOWEL); // not in tibwn.ini
|
||||
bestEwtsMap.put("\u0f75", THDLWylieConstants.U_VOWEL); // not in tibwn.ini
|
||||
}
|
||||
String mapping = (String)bestEwtsMap.get(wowel);
|
||||
if (null != mapping)
|
||||
return mapping;
|
||||
else
|
||||
return wowel;
|
||||
}
|
||||
|
||||
public void getDuffForWowel(ArrayList duff, DuffCode preceding,
|
||||
String wowel)
|
||||
throws ConversionException
|
||||
{
|
||||
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
|
||||
boolean preceding_added[] = new boolean[] { false };
|
||||
String[] wowels = wowel.split("\\+");
|
||||
for (int i = 0; i < wowels.length; i++) {
|
||||
getDuffForSingleWowel(duff, preceding,
|
||||
getBestEwtsForSingleWowel(wowels[i]),
|
||||
preceding_added);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
|
||||
|
||||
// Order matters here.
|
||||
boolean context_added[] = new boolean[] { false };
|
||||
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
|
||||
/** Wowels can stack. This works on a single wowel. */
|
||||
private void getDuffForSingleWowel(ArrayList duff, DuffCode preceding,
|
||||
String wowel, boolean preceding_added[])
|
||||
throws ConversionException
|
||||
{
|
||||
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { // TODO(dchandler): ka+o deserves at least a warning. kaM, though, does not. Do we handle it?
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, preceding_added);
|
||||
wowel = "";
|
||||
} else {
|
||||
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking
|
||||
if (wowel.indexOf(THDLWylieConstants.U_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.reverse_I_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_I_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.I_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.A_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.e_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
|
||||
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
|
||||
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
|
||||
}
|
||||
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
duff.add(TibetanMachineWeb.getGlyph("~X"));
|
||||
} else if (wowel.indexOf("X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
|
||||
duff.add(TibetanMachineWeb.getGlyph("X"));
|
||||
// We call these combining because the TMW font treats
|
||||
// such a vowel specially depending on the preceding glyph
|
||||
// with which it combines.
|
||||
String combining_wowels[] = new String[] {
|
||||
// order does not matter
|
||||
THDLWylieConstants.U_VOWEL,
|
||||
THDLWylieConstants.reverse_I_VOWEL,
|
||||
THDLWylieConstants.I_VOWEL,
|
||||
THDLWylieConstants.A_VOWEL,
|
||||
THDLWylieConstants.ai_VOWEL,
|
||||
THDLWylieConstants.reverse_i_VOWEL,
|
||||
THDLWylieConstants.i_VOWEL,
|
||||
THDLWylieConstants.e_VOWEL,
|
||||
THDLWylieConstants.o_VOWEL,
|
||||
THDLWylieConstants.au_VOWEL,
|
||||
THDLWylieConstants.u_VOWEL
|
||||
};
|
||||
for (int i = 0; i < combining_wowels.length; i++) {
|
||||
if (wowel.equals(combining_wowels[i])) {
|
||||
TibTextUtils.getVowel(duff, preceding, combining_wowels[i],
|
||||
preceding_added);
|
||||
wowel = removeFirstMatch(wowel, combining_wowels[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
|
||||
|
||||
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
|
||||
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
|
||||
// rely on the fact that we know every Wylie wowel that
|
||||
// contains 'M'. Let's, instead, parse the wowel.
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
|
||||
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
|
||||
if (wowel.equals(THDLWylieConstants.BINDU)) {
|
||||
DuffCode last = null;
|
||||
if (!context_added[0]) {
|
||||
if (!preceding_added[0]) {
|
||||
last = preceding;
|
||||
} else if (duff.size() > 0) {
|
||||
last = (DuffCode)duff.get(duff.size() - 1);
|
||||
|
@ -227,52 +276,77 @@ public final class EWTSTraits implements TTraits {
|
|||
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
|
||||
}
|
||||
TibTextUtils.getBindu(duff, last);
|
||||
context_added[0] = true;
|
||||
preceding_added[0] = true;
|
||||
wowel = removeFirstMatch(wowel, THDLWylieConstants.BINDU);
|
||||
}
|
||||
if (!context_added[0]) {
|
||||
|
||||
if (!preceding_added[0]) {
|
||||
duff.add(preceding);
|
||||
preceding_added[0] = true;
|
||||
}
|
||||
if (wowel.indexOf('H') >= 0)
|
||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||
int ix;
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
|
||||
|
||||
String standalone_wowels[] = new String[] {
|
||||
// order does not matter
|
||||
|
||||
// This likely won't look good! TMW has glyphs for [va]
|
||||
// and [fa], so use that transliteration if you care, not
|
||||
// [ph^] or [b^].
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
|
||||
wowel = sb.toString();
|
||||
THDLWylieConstants.WYLIE_TSA_PHRU,
|
||||
THDLWylieConstants.U0F35,
|
||||
THDLWylieConstants.U0F37,
|
||||
THDLWylieConstants.U0F7F,
|
||||
THDLWylieConstants.U0F82,
|
||||
THDLWylieConstants.U0F83,
|
||||
THDLWylieConstants.U0F86,
|
||||
THDLWylieConstants.U0F87,
|
||||
THDLWylieConstants.U0F19,
|
||||
THDLWylieConstants.U0F18,
|
||||
THDLWylieConstants.U0FC6,
|
||||
THDLWylieConstants.U0F3E,
|
||||
THDLWylieConstants.U0F3F,
|
||||
THDLWylieConstants.U0F84,
|
||||
};
|
||||
for (int i = 0; i < standalone_wowels.length; i++) {
|
||||
if (wowel.equals(standalone_wowels[i])) {
|
||||
ThdlDebug.verify(preceding_added[0]);
|
||||
duff.add(TibetanMachineWeb.getGlyph(standalone_wowels[i]));
|
||||
wowel = removeFirstMatch(wowel, standalone_wowels[i]);
|
||||
}
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
|
||||
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
|
||||
StringBuffer sb = new StringBuffer(wowel);
|
||||
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
|
||||
wowel = sb.toString();
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on
|
||||
// We verify that no part of wowel is discarded.
|
||||
if (wowel.length() > 0) {
|
||||
throw new ConversionException(
|
||||
"Full wowel was not handled, there remains: " + wowel);
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the
|
||||
// unicode output? i see (with tmuni font) lone bindus
|
||||
// without glyphs to stack on
|
||||
}
|
||||
|
||||
public String getUnicodeForWowel(String wowel) {
|
||||
if ("a".equals(wowel))
|
||||
if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
|
||||
return "";
|
||||
return helpGetUnicodeForWowel(wowel);
|
||||
}
|
||||
|
||||
private String helpGetUnicodeForWowel(String wowel) {
|
||||
if ("a".equals(wowel))
|
||||
if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
|
||||
return null; // ko+a+e is invalid, e.g.
|
||||
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0)))
|
||||
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0))) {
|
||||
if ("\u0f75".equals(wowel))
|
||||
return "\u0f71\u0f74"; // \u0f75 is discouraged
|
||||
if ("\u0f81".equals(wowel))
|
||||
return "\u0f71\u0f80"; // \u0f81 is discouraged
|
||||
if ("\u0f73".equals(wowel))
|
||||
return "\u0f71\u0f72"; // \u0f73 is discouraged
|
||||
if ("\u0f79".equals(wowel))
|
||||
return "\u0fb3\u0f81"; // \u0f79 is discouraged
|
||||
if ("\u0f78".equals(wowel))
|
||||
return "\u0fb3\u0f80"; // \u0f78 is discouraged
|
||||
return wowel;
|
||||
}
|
||||
// handle o+u, etc.
|
||||
int i;
|
||||
if ((i = wowel.indexOf("+")) >= 0) {
|
||||
|
@ -290,27 +364,27 @@ public final class EWTSTraits implements TTraits {
|
|||
} else {
|
||||
// Handle vowels. (TODO(dchandler): tibwn.ini has this
|
||||
// info, use that instead of duplicating it in this code.)
|
||||
if ("i".equals(wowel)) return "\u0f72";
|
||||
if ("u".equals(wowel)) return "\u0f74";
|
||||
if ("A".equals(wowel)) return "\u0f71";
|
||||
if ("U".equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
|
||||
if ("e".equals(wowel)) return "\u0f7a";
|
||||
if ("o".equals(wowel)) return "\u0f7c";
|
||||
if ("-i".equals(wowel)) return "\u0f80";
|
||||
if ("ai".equals(wowel)) return "\u0f7b";
|
||||
if ("au".equals(wowel)) return "\u0f7d";
|
||||
if ("-I".equals(wowel)) return "\u0f81";
|
||||
if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
|
||||
if (THDLWylieConstants.i_VOWEL.equals(wowel)) return "\u0f72";
|
||||
if (THDLWylieConstants.u_VOWEL.equals(wowel)) return "\u0f74";
|
||||
if (THDLWylieConstants.A_VOWEL.equals(wowel)) return "\u0f71";
|
||||
if (THDLWylieConstants.U_VOWEL.equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
|
||||
if (THDLWylieConstants.e_VOWEL.equals(wowel)) return "\u0f7a";
|
||||
if (THDLWylieConstants.o_VOWEL.equals(wowel)) return "\u0f7c";
|
||||
if (THDLWylieConstants.reverse_i_VOWEL.equals(wowel)) return "\u0f80";
|
||||
if (THDLWylieConstants.ai_VOWEL.equals(wowel)) return "\u0f7b";
|
||||
if (THDLWylieConstants.au_VOWEL.equals(wowel)) return "\u0f7d";
|
||||
if (THDLWylieConstants.reverse_I_VOWEL.equals(wowel)) return "\u0f71\u0f80"; // \u0f81 is discouraged
|
||||
if (THDLWylieConstants.I_VOWEL.equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: test, test, test.
|
||||
if ("M".equals(wowel)) return "\u0f7e";
|
||||
if ("H".equals(wowel)) return "\u0f7f";
|
||||
if ("?".equals(wowel)) return "\u0f84";
|
||||
if ("~M".equals(wowel)) return "\u0f83";
|
||||
if ("~M`".equals(wowel)) return "\u0f82";
|
||||
if ("X".equals(wowel)) return "\u0f37";
|
||||
if ("~X".equals(wowel)) return "\u0f35";
|
||||
if ("^".equals(wowel)) return "\u0f39";
|
||||
// TODO(DLC)[EWTS->Tibetan]: what about \u0f3e and \u0f3f!!!!
|
||||
if (THDLWylieConstants.BINDU.equals(wowel)) return "\u0f7e";
|
||||
if (THDLWylieConstants.U0F7F.equals(wowel)) return "\u0f7f";
|
||||
if (THDLWylieConstants.U0F84.equals(wowel)) return "\u0f84";
|
||||
if (THDLWylieConstants.U0F83.equals(wowel)) return "\u0f83";
|
||||
if (THDLWylieConstants.U0F82.equals(wowel)) return "\u0f82";
|
||||
if (THDLWylieConstants.U0F37.equals(wowel)) return "\u0f37";
|
||||
if (THDLWylieConstants.U0F35.equals(wowel)) return "\u0f35";
|
||||
if (THDLWylieConstants.WYLIE_TSA_PHRU.equals(wowel)) return "\u0f39";
|
||||
|
||||
return null;
|
||||
}
|
||||
|
@ -324,9 +398,9 @@ public final class EWTSTraits implements TTraits {
|
|||
for (int i = 0; i < l.length(); i++) {
|
||||
char ch = l.charAt(i);
|
||||
if ((ch < '\u0f00' || ch > '\u0fff')
|
||||
&& SAUVASTIKA != ch
|
||||
&& SWASTIKA != ch
|
||||
&& (ch < PUA_MIN || ch > PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
|
||||
&& THDLWylieConstants.SAUVASTIKA != ch
|
||||
&& THDLWylieConstants.SWASTIKA != ch
|
||||
&& (ch < THDLWylieConstants.PUA_MIN || ch > THDLWylieConstants.PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
|
||||
&& '\n' != ch
|
||||
&& '\r' != ch) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: Is this the place
|
||||
|
@ -346,6 +420,8 @@ public final class EWTSTraits implements TTraits {
|
|||
if ("Y".equals(l)) return "\u0fbb";
|
||||
if ("W".equals(l)) return "\u0fba";
|
||||
|
||||
// TODO(dchandler): use tibwn.ini -- it has this same info.
|
||||
|
||||
// g+h etc. should not be inputs to this function, but for
|
||||
// completeness they're here.
|
||||
if ("k".equals(l)) return "\u0F90";
|
||||
|
@ -455,18 +531,24 @@ public final class EWTSTraits implements TTraits {
|
|||
public boolean vowelsMayStack() { return true; }
|
||||
|
||||
public boolean isWowelThatRequiresAChen(String s) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: fix me!
|
||||
return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0))
|
||||
|| "?MHX^".indexOf(s.charAt(0)) >= 0))
|
||||
|| "~X".equals(s)
|
||||
|| "~M".equals(s)
|
||||
|| "~M`".equals(s)
|
||||
);
|
||||
// TODO(DLC)[EWTS->Tibetan]: not sure why we pick this subset.
|
||||
// Why don't we use a negative set of regular vowels like "i",
|
||||
// "o", etc.?
|
||||
return ((s.length() == 1
|
||||
&& (isUnicodeWowelThatRequiresAChen(s.charAt(0))))
|
||||
|| THDLWylieConstants.BINDU.equals(s)
|
||||
|| THDLWylieConstants.U0F35.equals(s)
|
||||
|| THDLWylieConstants.U0F37.equals(s)
|
||||
|| THDLWylieConstants.U0F7F.equals(s)
|
||||
|| THDLWylieConstants.U0F82.equals(s)
|
||||
|| THDLWylieConstants.U0F83.equals(s)
|
||||
|| THDLWylieConstants.U0F84.equals(s)
|
||||
|| THDLWylieConstants.WYLIE_TSA_PHRU.equals(s));
|
||||
}
|
||||
|
||||
public boolean isUnicodeWowelThatRequiresAChen(char ch) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits
|
||||
return "\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0;
|
||||
return ("\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0);
|
||||
}
|
||||
|
||||
public boolean couldBeValidStack(TPairList pl) {
|
||||
|
@ -485,33 +567,9 @@ public final class EWTSTraits implements TTraits {
|
|||
|
||||
public boolean stackingMustBeExplicit() { return true; }
|
||||
|
||||
public String U0F7F() { return "H"; }
|
||||
public String U0F7F() { return THDLWylieConstants.U0F7F; }
|
||||
|
||||
public String U0F35() { return "~X"; }
|
||||
public String U0F35() { return THDLWylieConstants.U0F35; }
|
||||
|
||||
public String U0F37() { return "X"; }
|
||||
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
http://www.symbols.com/encyclopedia/15/155.html to learn about
|
||||
its meaning as relates to Buddhism.
|
||||
*/
|
||||
static final char SAUVASTIKA = '\u534d';
|
||||
|
||||
/** The EWTS standard mentions this character specifically. See
|
||||
http://www.symbols.com/encyclopedia/15/151.html to learn about
|
||||
its meaning as relates to Buddhism.
|
||||
*/
|
||||
static final char SWASTIKA = '\u5350';
|
||||
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
static final char PUA_MIN = '\uf021';
|
||||
|
||||
/** EWTS has some glyphs not specified by Unicode in the
|
||||
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
|
||||
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
|
||||
* 2, 2005.) */
|
||||
static final char PUA_MAX = '\uf0ff';
|
||||
public String U0F37() { return THDLWylieConstants.U0F37; }
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.thdl.tib.text.ttt;
|
|||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.thdl.tib.text.THDLWylieConstants;
|
||||
|
||||
/**
|
||||
* This singleton class is able to break up Strings of EWTS text (for
|
||||
* example, an entire sutra file) into tsheg bars, comments, etc.
|
||||
|
@ -76,8 +78,11 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
|||
StringBuffer sb = new StringBuffer(s);
|
||||
ExpandEscapeSequences(sb);
|
||||
int sl = sb.length();
|
||||
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working
|
||||
// TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode
|
||||
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working (probably because)
|
||||
// TODO(DLC)[EWTS->Tibetan]:: '#', in ewts->tmw, is not working
|
||||
//
|
||||
// TODO(DLC)[EWTS->Tibetan]:: 'jamX one is not working in ewts->tmw mode in the sense that X appears under the last glyph of the three instead of the middle glyph
|
||||
//
|
||||
// TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working
|
||||
for (int i = 0; i < sl; i++) { // i is modified in the loop, also
|
||||
if (isValidInsideTshegBar(sb.charAt(i))) {
|
||||
|
@ -102,14 +107,14 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
|||
al.add(new TString("EWTS", "//",
|
||||
TString.TIBETAN_PUNCTUATION));
|
||||
++i;
|
||||
} else if ((sb.charAt(i) >= EWTSTraits.PUA_MIN
|
||||
&& sb.charAt(i) <= EWTSTraits.PUA_MAX)
|
||||
} else if ((sb.charAt(i) >= THDLWylieConstants.PUA_MIN
|
||||
&& sb.charAt(i) <= THDLWylieConstants.PUA_MAX)
|
||||
|| (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17')
|
||||
|| (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f')
|
||||
|| (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc')
|
||||
|| (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1')
|
||||
|| (EWTSTraits.SAUVASTIKA == sb.charAt(i))
|
||||
|| (EWTSTraits.SWASTIKA == sb.charAt(i))
|
||||
|| (THDLWylieConstants.SAUVASTIKA == sb.charAt(i))
|
||||
|| (THDLWylieConstants.SWASTIKA == sb.charAt(i))
|
||||
|| (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i))
|
||||
>= 0)) {
|
||||
al.add(new TString("EWTS", sb.substring(i, i+1),
|
||||
|
@ -186,7 +191,31 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
|
|||
// leave x == -1
|
||||
}
|
||||
if (x >= 0) {
|
||||
sb.replace(i, i + "\\uXXXX".length(), new String(new char[] { (char)x }));
|
||||
String replacement = String.valueOf((char)x);
|
||||
|
||||
if (false) {
|
||||
// This would ruin EWTS->Unicode to
|
||||
// help EWTS->TMW, so we don't do it.
|
||||
// TODO(dchandler): Fix EWTS->TMW for
|
||||
// \u0f02 and \u0f03.
|
||||
|
||||
// A nasty little HACK for you:
|
||||
//
|
||||
// TODO(dchandler): we may create "ga..u~M`H..ha" which may cause errors
|
||||
String hack = null;
|
||||
if ('\u0f02' == x) {
|
||||
hack = "u~M`H"; // hard-coded EWTS
|
||||
} else if ('\u0f03' == x) {
|
||||
hack = "u~M`:"; // hard-coded EWTS
|
||||
} else if ('\u0f00' == x) {
|
||||
hack = "oM"; // hard-coded EWTS
|
||||
}
|
||||
if (null != hack) {
|
||||
replacement = "." + hack + "."; // hard-coded EWTS disambiguators
|
||||
i += replacement.length() - 1;
|
||||
}
|
||||
}
|
||||
sb.replace(i, i + "\\uXXXX".length(), replacement);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.ArrayList;
|
|||
import org.thdl.tib.text.DuffCode;
|
||||
import org.thdl.tib.text.TibetanDocument;
|
||||
import org.thdl.tib.text.TibetanMachineWeb;
|
||||
import org.thdl.tib.text.THDLWylieConstants;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
|
@ -699,7 +700,13 @@ public class TConverter {
|
|||
} else {
|
||||
String wy = ttraits.getEwtsForOther(s.getText());
|
||||
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
|
||||
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
|
||||
duff = new Object[] { TibetanMachineWeb.maybeGetGlyph(wy) };
|
||||
if (null == duff[0]) {
|
||||
duff[0] =
|
||||
ErrorsAndWarnings.getMessage(
|
||||
137, shortMessages,
|
||||
s.getText(), ttraits);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -730,8 +737,8 @@ public class TConverter {
|
|||
ThdlDebug.verify(1 == s.getText().length());
|
||||
if (null != writer) {
|
||||
char ch = s.getText().charAt(0);
|
||||
if (ch >= EWTSTraits.PUA_MIN
|
||||
&& ch <= EWTSTraits.PUA_MAX) {
|
||||
if (ch >= THDLWylieConstants.PUA_MIN
|
||||
&& ch <= THDLWylieConstants.PUA_MAX) {
|
||||
hasErrors = true;
|
||||
String errorMessage =
|
||||
"[#ERROR "
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
import org.thdl.tib.text.TGCPair;
|
||||
import org.thdl.tib.text.TibetanMachineWeb;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
|
@ -710,47 +711,49 @@ class TPairList {
|
|||
wylieForConsonant.append(lastPair.getWylie(true, false));
|
||||
String hashKey = wylieForConsonant.toString();
|
||||
|
||||
// Because EWTS has special handling for full-formed
|
||||
// subjoined consonants, we have special handling here.
|
||||
if ("r+y".equals(hashKey))
|
||||
hashKey = "r+Y";
|
||||
else if ("y+y".equals(hashKey))
|
||||
hashKey = "y+Y";
|
||||
else if ("N+D+y".equals(hashKey))
|
||||
hashKey = "N+D+Y";
|
||||
else if ("N+D+r+y".equals(hashKey))
|
||||
hashKey = "N+D+R+y";
|
||||
else if ("k+Sh+r".equals(hashKey))
|
||||
hashKey = "k+Sh+R";
|
||||
if (traits.isACIP()) {
|
||||
// Because EWTS has special handling for full-formed
|
||||
// subjoined consonants, we have special handling here.
|
||||
if ("r+y".equals(hashKey))
|
||||
hashKey = "r+Y";
|
||||
else if ("y+y".equals(hashKey))
|
||||
hashKey = "y+Y";
|
||||
else if ("N+D+y".equals(hashKey))
|
||||
hashKey = "N+D+Y";
|
||||
else if ("N+D+r+y".equals(hashKey))
|
||||
hashKey = "N+D+R+y";
|
||||
else if ("k+Sh+r".equals(hashKey))
|
||||
hashKey = "k+Sh+R";
|
||||
|
||||
// TPair.getWylie(..) returns "W" sometimes when "w" is what
|
||||
// really should be returned. ("V" always causes "w" to be
|
||||
// returned, which is fine.) We'll change "W" to "w" here if
|
||||
// we need to. We do it only for a few known stacks (the ones
|
||||
// in TMW).
|
||||
if ("W".equals(hashKey))
|
||||
hashKey = "w";
|
||||
else if ("W+y".equals(hashKey))
|
||||
hashKey = "w+y";
|
||||
else if ("W+r".equals(hashKey))
|
||||
hashKey = "w+r";
|
||||
else if ("W+n".equals(hashKey))
|
||||
hashKey = "w+n";
|
||||
else if ("W+W".equals(hashKey))
|
||||
hashKey = "w+W";
|
||||
// TPair.getWylie(..) returns "W" sometimes when "w" is what
|
||||
// really should be returned. ("V" always causes "w" to be
|
||||
// returned, which is fine.) We'll change "W" to "w" here if
|
||||
// we need to. We do it only for a few known stacks (the ones
|
||||
// in TMW).
|
||||
if ("W".equals(hashKey))
|
||||
hashKey = "w";
|
||||
else if ("W+y".equals(hashKey))
|
||||
hashKey = "w+y";
|
||||
else if ("W+r".equals(hashKey))
|
||||
hashKey = "w+r";
|
||||
else if ("W+n".equals(hashKey))
|
||||
hashKey = "w+n";
|
||||
else if ("W+W".equals(hashKey))
|
||||
hashKey = "w+W";
|
||||
|
||||
if ("r+Y".equals(hashKey)
|
||||
|| "r+W".equals(hashKey)
|
||||
|| "r+sh".equals(hashKey)
|
||||
|| "r+sh+y".equals(hashKey)
|
||||
|| "r+Sh".equals(hashKey)
|
||||
|| "r+Sh+N".equals(hashKey)
|
||||
|| "r+Sh+N+y".equals(hashKey)
|
||||
|| "r+Sh+m".equals(hashKey)
|
||||
|| "r+Sh+y".equals(hashKey)
|
||||
|| "r+s".equals(hashKey)
|
||||
) {
|
||||
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
|
||||
if ("r+Y".equals(hashKey)
|
||||
|| "r+W".equals(hashKey)
|
||||
|| "r+sh".equals(hashKey)
|
||||
|| "r+sh+y".equals(hashKey)
|
||||
|| "r+Sh".equals(hashKey)
|
||||
|| "r+Sh+N".equals(hashKey)
|
||||
|| "r+Sh+N+y".equals(hashKey)
|
||||
|| "r+Sh+m".equals(hashKey)
|
||||
|| "r+Sh+y".equals(hashKey)
|
||||
|| "r+s".equals(hashKey)
|
||||
) {
|
||||
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
|
||||
}
|
||||
}
|
||||
|
||||
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
|
||||
|
@ -774,7 +777,7 @@ class TPairList {
|
|||
traits.getDuffForWowel(duffsAndErrors,
|
||||
TibetanMachineWeb.getGlyph(hashKey),
|
||||
lastPair.getRight());
|
||||
} catch (IllegalArgumentException e) {
|
||||
} catch (ConversionException e) {
|
||||
// TODO(dchandler): Error 137 isn't the perfect
|
||||
// message. Try EWTS [RAM], e.g. to see why. It acts
|
||||
// like we're trying to find a single glyph for (R
|
||||
|
|
|
@ -20,7 +20,12 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.thdl.tib.text.TibetanMachineWeb;
|
||||
import org.thdl.tib.text.THDLWylieConstants;
|
||||
|
||||
/** A factory for creating {@link TPairList TPairLists} from
|
||||
* Strings of ACIP.
|
||||
|
@ -374,6 +379,85 @@ class TPairListFactory {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** Returns a TPair just like tp (sometimes the very same,
|
||||
* unchanged instance) except that the wowel, if present, is in
|
||||
* the order that Section 9.11 of the Unicode Standard, version
|
||||
* 4.0.1, would have us use. */
|
||||
private static TPair ewtsSortWowels(TPair tp) {
|
||||
if (tp.getRight() != null
|
||||
&& tp.getRight().length() > 0
|
||||
&& !"+".equals(tp.getRight())) {
|
||||
class WowelComparator implements Comparator {
|
||||
/** @see
|
||||
* org.thdl.tib.text.tshegbar.UnicodeUtils#fixSomeOrderingErrorsInTibetanUnicode(StringBuffer) */
|
||||
private List order = Arrays.asList(new String[] {
|
||||
// equivalence class:
|
||||
"\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU,
|
||||
|
||||
// equivalence class:
|
||||
THDLWylieConstants.WYLIE_aVOWEL,
|
||||
|
||||
// equivalence class:
|
||||
"\u0f71", THDLWylieConstants.A_VOWEL,
|
||||
"\u0f73", THDLWylieConstants.I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
|
||||
"\u0f75", THDLWylieConstants.U_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
|
||||
"\u0f81", THDLWylieConstants.reverse_I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
|
||||
|
||||
"\u0f74", THDLWylieConstants.u_VOWEL,
|
||||
|
||||
// equivalence class:
|
||||
"\u0f72", THDLWylieConstants.i_VOWEL,
|
||||
"\u0f7a", THDLWylieConstants.e_VOWEL,
|
||||
"\u0f7b", THDLWylieConstants.ai_VOWEL,
|
||||
"\u0f7c", THDLWylieConstants.o_VOWEL,
|
||||
"\u0f7d", THDLWylieConstants.au_VOWEL,
|
||||
"\u0f80", THDLWylieConstants.reverse_i_VOWEL,
|
||||
|
||||
// equivalence class:
|
||||
"\u0f7e", THDLWylieConstants.BINDU,
|
||||
"\u0f82", THDLWylieConstants.U0F82,
|
||||
"\u0f83", THDLWylieConstants.U0F83,
|
||||
"\u0f86", THDLWylieConstants.U0F86,
|
||||
"\u0f87", THDLWylieConstants.U0F87,
|
||||
|
||||
// NOTE: we always say "e" comes before "o" but
|
||||
// either order would work.
|
||||
|
||||
/* TODO(dchandler): should these go with other
|
||||
* under-line wowels like \u0f74? They're for the
|
||||
* whole tsheg-bar, so they're oddballs...
|
||||
*
|
||||
* bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
|
||||
*
|
||||
* bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
|
||||
*
|
||||
* bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
|
||||
*
|
||||
* bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
|
||||
*/
|
||||
});
|
||||
public int compare(Object o1, Object o2) {
|
||||
int i1 = order.indexOf(o1);
|
||||
int i2 = order.indexOf(o2);
|
||||
if (i1 < 0) i1 = order.size();
|
||||
if (i2 < 0) i2 = order.size();
|
||||
return i1 - i2;
|
||||
}
|
||||
}
|
||||
String wowels[] = tp.getRight().split("\\+");
|
||||
java.util.Arrays.sort(wowels, new WowelComparator());
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < wowels.length; i++) {
|
||||
sb.append(wowels[i]);
|
||||
if (i + 1 < wowels.length)
|
||||
sb.append('+');
|
||||
}
|
||||
return new TPair(tp.getTraits(), tp.getLeft(), sb.toString());
|
||||
} else {
|
||||
return tp;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: doc
|
||||
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
|
||||
|
||||
|
@ -383,7 +467,9 @@ class TPairListFactory {
|
|||
|
||||
StringBuffer ewtsBuf = new StringBuffer(ewts);
|
||||
int howMuchBuf[] = new int[1];
|
||||
TPair head = getFirstConsonantAndVowel(ewtsBuf, howMuchBuf, ttraits);
|
||||
TPair head = ewtsSortWowels(getFirstConsonantAndVowel(ewtsBuf,
|
||||
howMuchBuf,
|
||||
ttraits));
|
||||
int howMuch = howMuchBuf[0];
|
||||
|
||||
TPairList tail;
|
||||
|
@ -448,7 +534,7 @@ class TPairListFactory {
|
|||
* {N+YE} or an error or whatever you like. howMuch[0] will be
|
||||
* set to the number of characters of tx that this call has
|
||||
* consumed. */
|
||||
private static TPair getFirstConsonantAndVowel(StringBuffer tx, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it?
|
||||
private static TPair getFirstConsonantAndVowel(StringBuffer tx,
|
||||
int howMuch[],
|
||||
TTraits ttraits) {
|
||||
// To handle EWTS "phywa\\u0f84\u0f86" [yes that's two slashes
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
|
|||
import java.util.HashSet;
|
||||
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
import org.thdl.tib.text.THDLWylieConstants;
|
||||
import org.thdl.util.ThdlDebug;
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
|
@ -66,8 +67,8 @@ public class TString {
|
|||
&& type != END_SLASH
|
||||
&& (type != UNICODE_CHARACTER
|
||||
|| !(UnicodeUtils.isInTibetanRange(ch = getText().charAt(0))
|
||||
|| (ch >= EWTSTraits.PUA_MIN
|
||||
&& ch <= EWTSTraits.PUA_MAX))));
|
||||
|| (ch >= THDLWylieConstants.PUA_MIN
|
||||
&& ch <= THDLWylieConstants.PUA_MAX))));
|
||||
}
|
||||
|
||||
/** For ACIP [#COMMENTS] and EWTS (DLC FIXME: what are EWTS comments?) */
|
||||
|
|
|
@ -136,7 +136,8 @@ public interface TTraits {
|
|||
|
||||
/** Gets the duffcodes for wowel, such that they look good with
|
||||
* the preceding glyph, and appends them to duff. */
|
||||
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
|
||||
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
|
||||
throws ConversionException;
|
||||
|
||||
/** Human-readable name of this transliteration for short error
|
||||
strings. */
|
||||
|
|
|
@ -43,7 +43,7 @@ public class VerboseUnicodeDump {
|
|||
java.nio.charset.Charset.forName(args[1]));
|
||||
int x;
|
||||
while (-1 != (x = fr.read())) {
|
||||
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, ""));
|
||||
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "", false));
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue