Numerous EWTS->Unicode and especially EWTS->TMW improvements.

Fixed ordering of Unicode wowels.  [ku+A] gives the correct Unicode
now, e.g.

EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A].

EWTS->TMW should now give errors any time the full input isn't used.
Previously, wacky wowels like [kai+-i] would lead to some droppage.

EWTS->TMW->Unicode testing is now in effect.  This found a ton of
EWTS->TMW bugs, most or all of which are fixed now.

TMW->Unicode is improved/fixed for {
\u5350,\u534D,\u0F88+k,\u0F88+kh,U }.  (Why U?  "\u0f75" is
discouraged in favor of "\u0f71\u0f74".)

NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly
builds' sake, but I ran it in my sandbox and it passed.
This commit is contained in:
dchandler 2005-07-11 02:51:06 +00:00
parent 36122778b4
commit 6d419fe641
19 changed files with 1014 additions and 547 deletions

View file

@ -1377,7 +1377,7 @@ public void paste(int offset)
if (TibetanMachineWeb.isPunc(val)) { //punctuation
val = TibetanMachineWeb.getWylieForPunc(val);
if (val.charAt(0) == TibetanMachineWeb.BINDU)
if (val.startsWith(THDLWylieConstants.BINDU))
putBindu();
else {

View file

@ -242,8 +242,8 @@
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 K+S+MA\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 K+S+YA\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 K+S+VA\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+k to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+kh to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 KH+KHA\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 KH+NA\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 KH+LA\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
@ -812,8 +812,8 @@
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F13 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie < to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie > to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u5350 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u534D to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF038 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF037 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 o\fs28\i0\b0\ul0\cf0 font 5; ord 101\par

View file

@ -242,8 +242,8 @@
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 k+s+ma\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 k+s+ya\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 k+s+wa\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0\\u0F88+k\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0\\u0F88+kh\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 kh+kha\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 kh+na\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 kh+la\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
@ -812,8 +812,8 @@
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0\\u0F13\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 <\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 >\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0\\u5350\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0\\u534D\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0\\uF038\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0\\uF037\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 X\fs28\i0\b0\ul0\cf0 font 5; ord 101\par

View file

@ -18,9 +18,80 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text;
/** This is where basic, static knowledge of THDL's Extended Wylie is housed.
/** This is where basic, static knowledge of THDL's Extended Wylie is
* housed. <p>TODO(dchandler): tibwn.ini has all this, yes? So
* extend TibetanMachineWeb if necessary and use a bunch of HashMaps
* there! This is needless duplication.
* @see TibetanMachineWeb */
public interface THDLWylieConstants {
// TODO(DLC)[EWTS->Tibetan]: what about U+2638, mentioned in Section
// 9.11 "Tibetan" of the Unicode 4.0.1 standard? Why doesn't EWTS
// mention it? (Because TMW has no glyph for it, I bet.) Do we
// handle it well?
/** The EWTS standard mentions this character specifically. See
* http://www.symbols.com/encyclopedia/15/155.html to learn about
* its meaning as relates to Buddhism.
*/
public static final char SAUVASTIKA = '\u534d';
/** The EWTS standard mentions this character specifically. See
* http://www.symbols.com/encyclopedia/15/151.html to learn about
* its meaning as relates to Buddhism.
*/
public static final char SWASTIKA = '\u5350';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
public static final char PUA_MIN = '\uf021';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
public static final char PUA_MAX = '\uf0ff';
/**
* the Wylie for U+0F3E
*/
public static final String U0F3E = "}";
/**
* the Wylie for U+0F3F
*/
public static final String U0F3F = "{";
/**
* the Wylie for U+0F86
*/
public static final String U0F86 = "\\u0F86";
/**
* the Wylie for U+0F87
*/
public static final String U0F87 = "\\u0F87";
/**
* the Wylie for U+0FC6
*/
public static final String U0FC6 = "\\u0FC6";
/**
* the Wylie for U+0F18
*/
public static final String U0F18 = "\\u0F18";
/**
* the Wylie for U+0F19
*/
public static final String U0F19 = "\\u0F19";
/**
* the Wylie for U+0F84
*/
public static final String U0F84 = "?";
/**
* the Wylie for U+0F7F
*/
public static final String U0F7F = "H";
/**
* the Wylie for U+0F35
*/
public static final String U0F35 = "~X";
/**
* the Wylie for U+0F37
*/
public static final String U0F37 = "X";
/**
* the Wylie for U+0F82
*/
@ -32,7 +103,7 @@ public interface THDLWylieConstants {
/**
* the Wylie for bindu/anusvara (U+0F7E)
*/
public static final char BINDU = 'M';
public static final String BINDU = "M";
/**
* the Wylie for tsheg
*/
@ -64,31 +135,51 @@ public interface THDLWylieConstants {
*/
public static final String WYLIE_TSA_PHRU = "^";
/**
* the Wylie for achung
* the Wylie for achung, \u0f60
*/
public static final char ACHUNG_character = '\'';
/**
* the Wylie for achung
* the Wylie for achung, \u0f60
*/
public static final String ACHUNG
= new String(new char[] { ACHUNG_character });
/**
* the Wylie for the 28th of the 30 consonants, sa:
* the Wylie for the 28th of the 30 consonants, sa, \u0f66:
*/
public static final String SA = "s";
/**
* the Wylie for the consonant ra:
* the Wylie for the consonant ra, \u0f62:
*/
public static final String RA = "r";
/**
* the Wylie for the 16th of the 30 consonants, ma:
* the Wylie for the 16th of the 30 consonants, ma, \u0f58:
*/
public static final String MA = "m";
/**
* the Wylie for the 4th of the 30 consonants, nga:
* the Wylie for \u0f56:
*/
public static final String BA = "b";
/**
* the Wylie for \u0f51:
*/
public static final String DA = "d";
/**
* the Wylie for \u0f42:
*/
public static final String GA = "g";
/**
* the Wylie for \u0f63:
*/
public static final String LA = "l";
/**
* the Wylie for the 4th of the 30 consonants, nga, \u0f44:
*/
public static final String NGA = "ng";
/**
* the Wylie for \u0f53:
*/
public static final String NA = "n";
/**
* the Wylie for achen
*/
public static final String ACHEN = "a";

View file

@ -418,7 +418,7 @@ public class TibTextUtils implements THDLWylieConstants {
chars.clear();
if (next.equals(String.valueOf(BINDU))) {
if (next.equals(BINDU)) {
if (glyphs.isEmpty())
dc = null;
else
@ -560,11 +560,11 @@ public class TibTextUtils implements THDLWylieConstants {
* or null */
public static void getBindu(List list, DuffCode dc) {
if (null == dc) {
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
list.add(TibetanMachineWeb.getGlyph(BINDU));
} else {
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
list.add(dc);
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU)));
list.add(TibetanMachineWeb.getGlyph(BINDU));
} else {
list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
}

View file

@ -1347,12 +1347,26 @@ public static boolean isKnownHashKey(String hashKey) {
* @see DuffCode
*/
public static DuffCode getGlyph(String hashKey) {
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
DuffCode dc = maybeGetGlyph(hashKey);
if (null == dc)
throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
return dc;
}
/**
* Gets a glyph for this hash key if possible; returns null
* otherwise.
* @see #getGlyph(String)
*/
public static DuffCode maybeGetGlyph(String hashKey) {
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
if (null == dc)
return null;
return dc[TMW];
}
/**
* Gets the half height character for this hash key.
* @param hashKey the key you want a half height glyph for; see {@link
@ -1783,6 +1797,8 @@ private static final String Unicode_tab = "\t";
= new DuffCode[] { new DuffCode(1, (char)58) };
private static final DuffCode[] tmwFor0F73
= new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(1, (char)109) };
private static final DuffCode[] tmwFor0F75
= new DuffCode[] { new DuffCode(10, (char)126) };
private static final DuffCode[] tmwFor0F76
= new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(8, (char)87) };
private static final DuffCode[] tmwFor0F77
@ -1840,6 +1856,8 @@ private static final String Unicode_tab = "\t";
return tmwFor0F6A;
} else if ('\u0F73' == ch) {
return tmwFor0F73;
} else if ('\u0F75' == ch) {
return tmwFor0F75;
} else if ('\u0F76' == ch) {
return tmwFor0F76;
} else if ('\u0F77' == ch) {

View file

@ -927,6 +927,15 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
// nyi.zla editor's mark. This is NOT \u0F82, although it looks very similar.
\uF03A~91,5~~9,89~~~~~~~none
// yungs.drung (reversed):
\u5350~97,5~~9,97~~~~~~~5350
// yungs.drung (standard):
\u534D~98,5~~9,98~~~~~~~534D
// utsama ka:
\u0F88+k~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\u0F88+kh~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
<?ToWylie?>
M~238,1~~8,90~~~~~~~0F7E
@ -1069,13 +1078,13 @@ A~204,2~~4,109~~~~~~~0F71
A~205,2~~4,110~~~~~~~0F71
A~206,2~~4,111~~~~~~~0F71
A~207,2~~4,112~~~~~~~0F71
U~211,2~~4,113~~~~~~~0F75
U~212,2~~4,114~~~~~~~0F75
U~213,2~~4,115~~~~~~~0F75
U~214,2~~4,116~~~~~~~0F75
U~215,2~~4,117~~~~~~~0F75
U~216,2~~4,118~~~~~~~0F75
U~217,2~~4,119~~~~~~~0F75
U~211,2~~4,113~~~~~~~0F71,0F74
U~212,2~~4,114~~~~~~~0F71,0F74
U~213,2~~4,115~~~~~~~0F71,0F74
U~214,2~~4,116~~~~~~~0F71,0F74
U~215,2~~4,117~~~~~~~0F71,0F74
U~216,2~~4,118~~~~~~~0F71,0F74
U~217,2~~4,119~~~~~~~0F71,0F74
u~224,2~~4,120~~~~~~~0F74
u~225,2~~4,121~~~~~~~0F74
u~226,2~~4,122~~~~~~~0F74
@ -1090,13 +1099,13 @@ A~204,3~~6,109~~~~~~~0F71
A~205,3~~6,110~~~~~~~0F71
A~206,3~~6,111~~~~~~~0F71
A~207,3~~6,112~~~~~~~0F71
U~211,3~~6,113~~~~~~~0F75
U~212,3~~6,114~~~~~~~0F75
U~213,3~~6,115~~~~~~~0F75
U~214,3~~6,116~~~~~~~0F75
U~215,3~~6,117~~~~~~~0F75
U~216,3~~6,118~~~~~~~0F75
U~217,3~~6,119~~~~~~~0F75
U~211,3~~6,113~~~~~~~0F71,0F74
U~212,3~~6,114~~~~~~~0F71,0F74
U~213,3~~6,115~~~~~~~0F71,0F74
U~214,3~~6,116~~~~~~~0F71,0F74
U~215,3~~6,117~~~~~~~0F71,0F74
U~216,3~~6,118~~~~~~~0F71,0F74
U~217,3~~6,119~~~~~~~0F71,0F74
u~224,3~~6,120~~~~~~~0F74
u~225,3~~6,121~~~~~~~0F74
u~226,3~~6,122~~~~~~~0F74
@ -1111,13 +1120,13 @@ A~204,4~~8,109~~~~~~~0F71
A~205,4~~8,110~~~~~~~0F71
A~206,4~~8,111~~~~~~~0F71
A~207,4~~8,112~~~~~~~0F71
U~211,4~~8,113~~~~~~~0F75
U~212,4~~8,114~~~~~~~0F75
U~213,4~~8,115~~~~~~~0F75
U~214,4~~8,116~~~~~~~0F75
U~215,4~~8,117~~~~~~~0F75
U~216,4~~8,118~~~~~~~0F75
U~217,4~~8,119~~~~~~~0F75
U~211,4~~8,113~~~~~~~0F71,0F74
U~212,4~~8,114~~~~~~~0F71,0F74
U~213,4~~8,115~~~~~~~0F71,0F74
U~214,4~~8,116~~~~~~~0F71,0F74
U~215,4~~8,117~~~~~~~0F71,0F74
U~216,4~~8,118~~~~~~~0F71,0F74
U~217,4~~8,119~~~~~~~0F71,0F74
u~224,4~~8,120~~~~~~~0F74
u~225,4~~8,121~~~~~~~0F74
u~226,4~~8,122~~~~~~~0F74
@ -1131,13 +1140,13 @@ A~163,1~~10,116~~~~~~~0F71
A~164,1~~10,117~~~~~~~0F71
A~211,1~~10,118~~~~~~~0F71
A~212,1~~10,119~~~~~~~0F71
U~213,1~~10,120~~~~~~~0F75
U~214,1~~10,121~~~~~~~0F75
U~215,1~~10,122~~~~~~~0F75
U~216,1~~10,123~~~~~~~0F75
U~217,1~~10,124~~~~~~~0F75
U~218,1~~10,125~~~~~~~0F75
U~219,1~~10,126~~~~~~~0F75
U~213,1~~10,120~~~~~~~0F71,0F74
U~214,1~~10,121~~~~~~~0F71,0F74
U~215,1~~10,122~~~~~~~0F71,0F74
U~216,1~~10,123~~~~~~~0F71,0F74
U~217,1~~10,124~~~~~~~0F71,0F74
U~218,1~~10,125~~~~~~~0F71,0F74
U~219,1~~10,126~~~~~~~0F71,0F74
// ra.mgo:
r~173,4~~8,66~~~~~~~0F62
@ -1191,13 +1200,3 @@ r~176,4~~8,71~~~~~~~0FB2
\tmw8070~67,5~~9,70~~~~~~~none
\tmw8071~68,5~~9,71~~~~~~~none
\tmw8072~69,5~~9,72~~~~~~~none
// yungs.drung (reversed):
\tmw8097~97,5~~9,97~~~~~~~5350
// yungs.drung (standard):
\tmw8098~98,5~~9,98~~~~~~~534D
// utsama ka:
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91

View file

@ -298,7 +298,7 @@ public class UnicodeUtils implements UnicodeConstants {
characters will appear as themselves. */
public static String unicodeCodepointToString(char cp,
boolean shortenIfPossible) {
return unicodeCodepointToString(cp, shortenIfPossible, "\\u");
return unicodeCodepointToString(cp, shortenIfPossible, "\\u", false);
}
/** Like {@link #unicodeCodepointToString(char, boolean)} if you
@ -307,7 +307,8 @@ public class UnicodeUtils implements UnicodeConstants {
<code>0F55</code>. */
public static String unicodeCodepointToString(char cp,
boolean shortenIfPossible,
String prefix) {
String prefix,
boolean upperCase) {
if (shortenIfPossible) {
if ((cp >= 'a' && cp <= 'z')
|| (cp >= 'A' && cp <= 'Z')
@ -348,14 +349,16 @@ public class UnicodeUtils implements UnicodeConstants {
return "\\r";
}
String suffix;
if (cp < '\u0010')
return prefix + "000" + Integer.toHexString((int)cp);
suffix = "000" + Integer.toHexString((int)cp);
else if (cp < '\u0100')
return prefix + "00" + Integer.toHexString((int)cp);
suffix = "00" + Integer.toHexString((int)cp);
else if (cp < '\u1000')
return prefix + "0" + Integer.toHexString((int)cp);
suffix = "0" + Integer.toHexString((int)cp);
else
return prefix + Integer.toHexString((int)cp);
suffix = Integer.toHexString((int)cp);
return prefix + (upperCase ? suffix.toUpperCase() : suffix);
}
/**

View file

@ -546,10 +546,12 @@ public final class ACIPTraits implements TTraits {
/** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws ConversionException
{
if (null == wowel) return;
if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion! Use assert.
throw new IllegalArgumentException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
throw new ConversionException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
// Order matters here.
boolean context_added[] = new boolean[] { false };
@ -619,8 +621,10 @@ public final class ACIPTraits implements TTraits {
try {
return TPairListFactory.breakACIPIntoChunks(tt, sh);
} catch (StackOverflowError e) {
// TODO(dchandler): use ConversionException? Stop catching these?
throw new IllegalArgumentException("Input too large[1]: " + tt);
} catch (OutOfMemoryError e) {
// TODO(dchandler): use ConversionException? Stop catching these?
throw new IllegalArgumentException("Input too large[2]: " + tt);
}
}

View file

@ -0,0 +1,30 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
/**
* @author David Chandler
*
* <p>A ConversionException is a general-purpose checked exception
* used to indicate a problem during conversion.
*/
public final class ConversionException extends Exception {
/** @see Exception.Exception(String) */
ConversionException(String x) { super(x); }
}

View file

@ -19,10 +19,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.io.PrintStream;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.thdl.util.ThdlOptions;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
/** Tests this package's ability to understand EWTS and turn it into
@ -76,42 +78,145 @@ public class EWTSTest extends TestCase {
}
}
/** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode) {
// TODO(DLC)[EWTS->Tibetan]: In addition to what this
// currently does, have this function convert to TMW and
// convert that TMW to Unicode and verify that the result is
// the same. Almost every call should allow for that.
/** Returns the Unicode corresponding to the TMW to which ewts
* corresponds, or null if we couldn't push through, even with
* errors, from EWTS->TMW->Unicode. */
private static String ewts2tmw2uni(String ewts) {
TTraits traits = EWTSTraits.instance();
StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,
null, true,
"None", // TODO(DLC)[EWTS->Tibetan]: ???
false /* short warnings */);
if (null == unicode) {
boolean shortMessages = true;
String warningLevel = "All"; // slow but exercises more code paths
ArrayList scan
= traits.scanner().scan(ewts, errors, -1,
shortMessages,
warningLevel);
if (null == scan)
return null;
if (errors.length() > 0)
return null;
errors = new StringBuffer();
TibetanDocument tdoc = new TibetanDocument();
boolean rv;
try {
rv = TConverter.convertToTMW(traits,
scan, tdoc, errors, null, null,
false, warningLevel,
shortMessages, true,
new int[] { tdoc.getLength() });
} catch (java.io.IOException e) {
// I doubt this can happen.
throw new Error(e.toString());
}
if (!rv)
return null;
if (tdoc.getLength() < 1 && ewts.length() > 0)
return null;
errors = new StringBuffer();
long numAttemptedReplacements[] = new long[] { 0 };
tdoc.convertToUnicode(0, tdoc.getLength(), errors, null,
numAttemptedReplacements);
if (errors.length() > 0)
return null;
if (numAttemptedReplacements[0] < 1)
return null;
try {
return tdoc.getText(0, tdoc.getLength());
} catch (javax.swing.text.BadLocationException e) {
throw new Error("I know this won't happen: " + e);
}
}
static void ewts2uni_test(String ewts, String expectedUnicode) {
ewts2uni_test(ewts, expectedUnicode, true);
}
/** Tests EWTS->Unicode but not EWTS->TMW[->Unicode]. */
static void just_ewts2uni_test(String ewts, String expectedUnicode) {
ewts2uni_test(ewts, expectedUnicode, false);
}
/** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. If doEwts2tmw2uni is
* true, then this causes a test case failure if an
* EWTS->TMW->Unicode trip doesn't give the same
* expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode,
boolean doEwts2tmw2uni) {
StringBuffer errors = new StringBuffer();
String unicode
= TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,
null, true,
"None", // TODO(DLC)[EWTS->Tibetan]: ???
false /* short warnings */);
help_ewts2uni_test("EWTS->Unicode: ",
ewts, expectedUnicode, unicode, errors);
if (doEwts2tmw2uni) {
help_ewts2uni_test("EWTS->TMW->Unicode: ",
ewts, expectedUnicode, ewts2tmw2uni(ewts),
new StringBuffer());
}
}
/** Doing EWTS->Unicode conversions yields one answer out of many
* for some inputs, such as "b+ha". This function checks for
* equality between two pieces of Unicode modulo such acceptable
* changes. It's only complete enough to handle the test cases
* we have. Why do we make two choices? TMW->Unicode is
* different source code from EWTS->Unicode; that's why. */
private static boolean ewts2uni_unicode_equality(String expectedUnicode,
String actualUnicode) {
// TODO(dchandler): replaceAll is a 1.4-ism. Will users balk?
if (actualUnicode
.replaceAll("\u0f0d\u0f0d", "\u0f0e") // TMW has no \u0f0e glyph
.replaceAll("\u0f69", "\u0f40\u0fb5") // equivalent and neither are discouraged
.replaceAll("\u0f43", "\u0f42\u0fb7") // ditto...
.replaceAll("\u0f4d", "\u0f4c\u0fb7")
.replaceAll("\u0f52", "\u0f51\u0fb7")
.replaceAll("\u0f57", "\u0f56\u0fb7")
.replaceAll("\u0f5c", "\u0f5b\u0fb7")
.replaceAll("\u0fb9", "\u0f90\u0fb5")
.replaceAll("\u0f93", "\u0f92\u0fb7")
.replaceAll("\u0f9d", "\u0f9c\u0fb7")
.replaceAll("\u0fa2", "\u0fa1\u0fb7")
.replaceAll("\u0fa7", "\u0fa6\u0fb7") // ...
.replaceAll("\u0fac", "\u0fab\u0fb7") // equivalent and neither are discouraged
.equals(expectedUnicode)) {
return true;
}
return expectedUnicode.equals(actualUnicode);
}
private static void help_ewts2uni_test(String prefix,
String ewts,
String expectedUnicode,
String actualUnicode,
StringBuffer errors) {
if (null == actualUnicode) {
if (null != expectedUnicode && "none" != expectedUnicode) {
System.out.println("No unicode exists for " + ewts
System.out.println(prefix + "No unicode exists for " + ewts
+ " but you expected "
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
assertTrue(false);
}
System.out.println("Unicode for " + ewts + " can't be had; errors are " + errors);
System.out.println(prefix + "Unicode for " + ewts + " can't be had; errors are " + errors);
} else {
if (null != expectedUnicode && !expectedUnicode.equals(unicode)) {
explainInequality(unicode, expectedUnicode, System.out);
if (UnicodeUtils.unicodeStringToPrettyString(unicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
System.out.println("UGLY strings: The unicode for\n \"" + ewts
if (null != expectedUnicode
&& !ewts2uni_unicode_equality(expectedUnicode, actualUnicode)) {
explainInequality(actualUnicode, expectedUnicode, System.out);
if (UnicodeUtils.unicodeStringToPrettyString(actualUnicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
System.out.println(prefix + "UGLY strings: The unicode for\n \"" + ewts
+ "\"\nis\n \""
+ unicode
+ actualUnicode
+ "\",\nbut you expected\n \""
+ expectedUnicode
+ "\"");
} else {
System.out.println("The unicode for\n \"" + ewts
System.out.println(prefix + "The unicode for\n \"" + ewts
+ "\"\nis\n \""
+ UnicodeUtils.unicodeStringToPrettyString(unicode)
+ UnicodeUtils.unicodeStringToPrettyString(actualUnicode)
+ "\",\nbut you expected\n \""
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)
+ "\"");
@ -122,7 +227,7 @@ public class EWTSTest extends TestCase {
TPairList[] la
= EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false);
assertTrue(la[1] == null);
System.out.println("EWTS=" + ewts + " and l'=" + la[0].toString2());
System.out.println(prefix + "EWTS=" + ewts + " and l'=" + la[0].toString2());
}
assertTrue(false);
}
@ -156,24 +261,25 @@ public class EWTSTest extends TestCase {
public void test0F39() {
ewts2uni_test("v", "\u0F56\u0F39");
ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
just_ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
ewts2uni_test("faM", "\u0f55\u0f39\u0f7e");
ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e");
ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
just_ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
just_ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
just_ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
ewts2uni_test("a^", "\u0f68\u0f39");
ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
ewts2uni_test("hUM^", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
ewts2uni_test("ph^", "\u0f55\u0f39");
ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!?
ewts2uni_test("phe^", "\u0f55\u0f39\u0f7a");
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
ewts2uni_test("a\u0f39", "\u0f68\u0f39");
ewts2uni_test("hUM\u0f39", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
ewts2uni_test("hUM\u0f39", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
ewts2uni_test("ph\u0f39", "\u0f55\u0f39");
ewts2uni_test("phe\u0f39", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!?
ewts2uni_test("phe\u0f39", "\u0f55\u0f39\u0f7a");
ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
}
@ -181,6 +287,13 @@ public class EWTSTest extends TestCase {
/** Tests that the EWTS->unicode converter isn't completely
braindead. */
public void testEwtsBasics() {
just_ewts2uni_test("r+sa", "\u0f62\u0fb6");
ewts2uni_test("R+s", "\u0f6a\u0fb6");
ewts2uni_test("k?e", "\u0f40\u0f84\u0f68\u0f7a");
ewts2uni_test("ko+o", "\u0f40\u0f7c\u0f7c");
ewts2uni_test("kau+u", "\u0f40\u0f74\u0f7d");
ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66");
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
@ -220,39 +333,46 @@ public class EWTSTest extends TestCase {
ewts2uni_test("b.ra ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("bara ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
just_ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
}
/** Miscellaneous tests of EWTS->Unicode conversion. */
public void test__EWTS__miscellany() {
just_ewts2uni_test("ga\\u0f02ha", "\u0f42\u0f02\u0f67"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("g.\\u0f03\u0f0b", "\u0f42\u0f03\u0f0b"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
ewts2uni_test("", "");
just_ewts2uni_test("k+\u0fb2e", "\u0f40\u0fb2\u0f7a");
assert_EWTS_error("\u0f42ya");
just_ewts2uni_test("\u0f42+ya", "\u0f42\u0fb1");
just_ewts2uni_test("\u0f42.ya", "\u0f42\u0f61");
just_ewts2uni_test("", "");
ewts2uni_test("0\\u0f19", "\u0f20\u0f19");
ewts2uni_test("0\\u0f18", "\u0f20\u0f18");
ewts2uni_test("0\\u0f3e", "\u0f20\u0f3e"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
ewts2uni_test("0\\u0f3f", "\u0f20\u0f3f"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
ewts2uni_test("R", "\u0f6A");
ewts2uni_test("Ra", "\u0f6A");
just_ewts2uni_test("R", "\u0f6A");
just_ewts2uni_test("Ra", "\u0f6A");
ewts2uni_test("R+ka", "\u0F6A\u0f90");
ewts2uni_test("k+Wa", "\u0f40\u0FBA");
ewts2uni_test("k+Ya", "\u0f40\u0FBB");
ewts2uni_test("k+Ra", "\u0f40\u0FBC");
just_ewts2uni_test("R+ka", "\u0F6A\u0f90");
just_ewts2uni_test("k+Wa", "\u0f40\u0FBA");
just_ewts2uni_test("k+Ya", "\u0f40\u0FBB");
just_ewts2uni_test("k+Ra", "\u0f40\u0FBC");
ewts2uni_test("k+wa", "\u0f40\u0Fad");
ewts2uni_test("k+la", "\u0f40\u0Fb3");
ewts2uni_test("k+ya", "\u0f40\u0Fb1");
ewts2uni_test("k+ra", "\u0f40\u0Fb2");
ewts2uni_test("r-I", "\u0f62\u0f81");
ewts2uni_test("l-I", "\u0f63\u0f81");
ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80");
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f81");
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f71\u0f80");
ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80");
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f81");
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f71\u0f80");
}
@ -277,9 +397,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("u", "\u0f68\u0f74");
ewts2uni_test("U", "\u0f68\u0f71\u0f74");
ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80");
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f81");
ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f81");
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f71\u0f80");
just_ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
just_ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f71\u0f80");
ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("ai", "\u0f68\u0f7b");
// ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]:
@ -289,11 +409,12 @@ public class EWTSTest extends TestCase {
// ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("-i", "\u0f68\u0f80");
ewts2uni_test("-I", "\u0f68\u0f81");
ewts2uni_test("-I", "\u0f68\u0f71\u0f80");
// ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("\\u0f68", "\u0f68");
just_ewts2uni_test("\\u0f68", "\u0f68");
ewts2uni_test("\\u0f86", "\u0f68\u0f86"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a\\u0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0F86", "\u0f68\u0f86");
@ -305,7 +426,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("a\\u0f87", "\u0f68\u0f87");
// ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aHM", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a", "\u0f68");
}
@ -325,7 +446,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7e\u0f7f");
ewts2uni_test("u+A", "\u0f68\u0f74\u0f71");
ewts2uni_test("o+-I", "DLC");
@ -342,9 +463,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ku", "\u0f40\u0f74");
ewts2uni_test("kU", "\u0f40\u0f71\u0f74");
ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80");
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f81");
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f71\u0f80");
ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80");
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f81");
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f71\u0f80");
ewts2uni_test("ke", "\u0f40\u0f7a");
ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("a", "\u0f68");
@ -354,7 +475,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("kaM", "\u0f40\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("kaH", "\u0f40\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k-i", "\u0f40\u0f80");
ewts2uni_test("k-I", "\u0f40\u0f81");
ewts2uni_test("k-I", "\u0f40\u0f71\u0f80");
ewts2uni_test("ka~M`", "\u0f40\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("ka~M", "\u0f40\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("ka?", "\u0f40\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -369,7 +490,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ka\\u0f87", "\u0f40\u0f87");
ewts2uni_test("kaMH", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("kaHM", "\u0f40\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("kaHM", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -380,10 +501,10 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ko+e", "\u0f40\u0f7c\u0f7a");
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("ku+A", "\u0f40\u0f74\u0f71");
ewts2uni_test("ko+e", "\u0f40\u0f7a\u0f7c");
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("ku+A", "\u0f40\u0f71\u0f74");
ewts2uni_test("k", "\u0f40");
ewts2uni_test("ka", "\u0f40");
@ -414,7 +535,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'aM", "\u0f60\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'aH", "\u0f60\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'-i", "\u0f60\u0f80");
ewts2uni_test("'-I", "\u0f60\u0f81");
ewts2uni_test("'-I", "\u0f60\u0f71\u0f80");
ewts2uni_test("'a~M`", "\u0f60\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'a~M", "\u0f60\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'a?", "\u0f60\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -429,7 +550,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'a\\u0f87", "\u0f60\u0f87");
ewts2uni_test("'aMH", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'aHM", "\u0f60\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'aHM", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -440,19 +561,19 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'o+e", "\u0f60\u0f7c\u0f7a");
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("'o+e", "\u0f60\u0f7a\u0f7c");
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("'u+A", "\u0f60\u0f74\u0f71");
ewts2uni_test("'u+A", "\u0f60\u0f71\u0f74");
ewts2uni_test("'", "\u0f60");
ewts2uni_test("'a", "\u0f60");
ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f81");
just_ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
just_ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f71\u0f80");
just_ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
just_ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f71\u0f80");
}
/** Tests that our implementation of EWTS's wowels are correct,
@ -471,7 +592,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80");
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f81");
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f71\u0f80");
ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -486,7 +607,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87");
ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -497,18 +618,18 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f74\u0f71");
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7a\u0f7c");
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f71\u0f74");
ewts2uni_test("k+Sh", "\u0f40\u0fb5");
ewts2uni_test("k+Sha", "\u0f40\u0fb5");
ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f81");
just_ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
just_ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f71\u0f80");
ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80");
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f81");
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f71\u0f80");
}
/** Tests that our implementation of EWTS's wowels are correct,
@ -526,12 +647,12 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c");
ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d");
ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80");
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f81");
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f71\u0f80");
ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84");
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86");
@ -552,10 +673,10 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7c\u0f7a");
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f74\u0f71");
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7c");
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f71\u0f74");
ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad");
ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad");
@ -566,7 +687,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
assert_EWTS_error("phywr-i");
assert_EWTS_error("phyw+r-i");
@ -579,55 +700,55 @@ public class EWTSTest extends TestCase {
* (U+0F40,U+0F97,U+0F97,U+0F90,U+0F90,U+0F97) is correct. I
* chose this stack as an example of an absurd stack. */
public void test__EWTS__wowels_on_kjjkkj() {
ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80");
ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f81");
ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87");
just_ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
just_ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
just_ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
just_ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
just_ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
just_ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
just_ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
just_ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
just_ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
just_ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80");
just_ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f80");
just_ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
just_ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87");
ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
just_ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au.
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7a");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71");
just_ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
just_ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
just_ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
just_ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
just_ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
just_ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
just_ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7c");
just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c");
just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
just_ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f81");
ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f81");
just_ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
just_ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
just_ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
just_ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f71\u0f80");
just_ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
just_ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f71\u0f80");
}
/** Tests that the EWTS that the spec says corresponds to each
@ -644,14 +765,16 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0000", "\u0000");
ewts2uni_test("\\u0eff", "\u0eff");
}
ewts2uni_test("\\u0f00", "\u0f00");
ewts2uni_test("\\u0f40", "\u0f40");
just_ewts2uni_test("\\u0f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("\\u0F02", "\u0F02"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("\\u0F03", "\u0F03"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("\\u0f40", "\u0f40");
if (RUN_FAILING_TESTS) {
assert_EWTS_error("\\u0f70"); // reserved codepoint
assert_EWTS_error("\\u0fff"); // reserved codepoint
ewts2uni_test("\\uf000", "\uf000");
ewts2uni_test("\\uf01f", "\uf01f");
ewts2uni_test("\\uefff", "\uefff");
just_ewts2uni_test("\\uf000", "\uf000");
just_ewts2uni_test("\\uf01f", "\uf01f");
just_ewts2uni_test("\\uefff", "\uefff");
}
@ -661,11 +784,11 @@ public class EWTSTest extends TestCase {
ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("\\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("oM",
false ? "\u0F00" : "\u0f68\u0f7c\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: which is correct? see e-mail (maybe it was cfynn who thought \u0F00 ought not be generated?
ewts2uni_test("\\u0F01", "\u0F01");
ewts2uni_test("\\u0F02", "\u0F02");
ewts2uni_test("\\u0F03", "\u0F03");
ewts2uni_test("@", "\u0F04");
ewts2uni_test("#", "\u0F05"); // TODO(DLC)[EWTS->Tibetan]: warning/error? [#] alone is nonsense.
ewts2uni_test("$", "\u0F06");
@ -777,9 +900,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("u", achen + "\u0F74");
ewts2uni_test("U", achen + "\u0F71\u0F74");
ewts2uni_test("a+r-i", achen + "\u0fb2\u0f80"); // not 0F76, which is discouraged by the Unicode standard
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f81"); // not 0F77, which is discouraged by the Unicode standard
ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
ewts2uni_test("a+l-I", achen + "\u0fb3\u0f81"); // not 0F79, which is discouraged by the Unicode standard
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f71\u0f80"); // not 0F77, which is discouraged by the Unicode standard
just_ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
just_ewts2uni_test("a+l-I", achen + "\u0fb3\u0f71\u0f80"); // not 0F79, which is discouraged by the Unicode standard
ewts2uni_test("e", achen + "\u0F7A");
ewts2uni_test("ai", achen + "\u0F7B");
ewts2uni_test("o", achen + "\u0F7C");
@ -787,7 +910,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("M", achen + "\u0F7E");
ewts2uni_test("H", achen + "\u0F7F");
ewts2uni_test("-i", achen + "\u0F80");
ewts2uni_test("-I", achen + "\u0F81");
ewts2uni_test("-I", achen + "\u0F71\u0F80");
ewts2uni_test("~M`", achen + "\u0F82");
ewts2uni_test("~M", achen + "\u0F83");
ewts2uni_test("?", achen + "\u0F84"); // \u0f84 is a combiner
@ -799,8 +922,8 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0F8A", "\u0F8A");
ewts2uni_test("\\u0F8B", "\u0F8B");
final String ewts_for_superscript = "tsh+";
final String unicode_for_superscript = "\u0f5a";
final String ewts_for_superscript = "r+";
final String unicode_for_superscript = "\u0f62";
ewts2uni_test(ewts_for_superscript + "k",
unicode_for_superscript + "\u0F90");
ewts2uni_test(ewts_for_superscript + "kh",
@ -812,10 +935,10 @@ public class EWTSTest extends TestCase {
+ (false ? "\u0F93" : "\u0f92\u0fb7"));
ewts2uni_test(ewts_for_superscript + "ng",
unicode_for_superscript + "\u0F94");
ewts2uni_test(ewts_for_superscript + "c",
unicode_for_superscript + "\u0F95");
ewts2uni_test(ewts_for_superscript + "ch",
unicode_for_superscript + "\u0F96");
just_ewts2uni_test(ewts_for_superscript + "c",
unicode_for_superscript + "\u0F95");
just_ewts2uni_test(ewts_for_superscript + "ch",
unicode_for_superscript + "\u0F96");
ewts2uni_test(ewts_for_superscript + "j",
unicode_for_superscript + "\u0F97");
ewts2uni_test(ewts_for_superscript + "ny",
@ -826,9 +949,9 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0F9B");
ewts2uni_test(ewts_for_superscript + "D",
unicode_for_superscript + "\u0F9C");
ewts2uni_test(ewts_for_superscript + "D+h",
unicode_for_superscript
+ (false ? "\u0F9D" : "\u0f9c\u0fb7"));
just_ewts2uni_test(ewts_for_superscript + "D+h",
unicode_for_superscript
+ (false ? "\u0F9D" : "\u0f9c\u0fb7"));
ewts2uni_test(ewts_for_superscript + "N",
unicode_for_superscript + "\u0F9E");
ewts2uni_test(ewts_for_superscript + "t",
@ -844,8 +967,8 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0FA3");
ewts2uni_test(ewts_for_superscript + "p",
unicode_for_superscript + "\u0FA4");
ewts2uni_test(ewts_for_superscript + "ph",
unicode_for_superscript + "\u0FA5");
just_ewts2uni_test(ewts_for_superscript + "ph",
unicode_for_superscript + "\u0FA5");
ewts2uni_test(ewts_for_superscript + "b",
unicode_for_superscript + "\u0FA6");
ewts2uni_test(ewts_for_superscript + "b+h",
@ -859,119 +982,122 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0FAA");
ewts2uni_test(ewts_for_superscript + "dz",
unicode_for_superscript + "\u0FAB");
ewts2uni_test(ewts_for_superscript + "dz+h",
unicode_for_superscript
+ (false ? "\u0FAC" : "\u0fab\u0fb7"));
just_ewts2uni_test(ewts_for_superscript + "dz+h",
unicode_for_superscript
+ (false ? "\u0FAC" : "\u0fab\u0fb7"));
ewts2uni_test(ewts_for_superscript + "w",
unicode_for_superscript + "\u0FAD");
ewts2uni_test(ewts_for_superscript + "zh",
unicode_for_superscript + "\u0FAE");
ewts2uni_test(ewts_for_superscript + "z",
unicode_for_superscript + "\u0FAF");
ewts2uni_test(ewts_for_superscript + "'",
unicode_for_superscript + "\u0FB0");
ewts2uni_test(ewts_for_superscript + "y",
unicode_for_superscript + "\u0FB1");
ewts2uni_test(ewts_for_superscript + "r",
unicode_for_superscript + "\u0FB2");
just_ewts2uni_test(ewts_for_superscript + "zh",
unicode_for_superscript + "\u0FAE");
just_ewts2uni_test(ewts_for_superscript + "z",
unicode_for_superscript + "\u0FAF");
just_ewts2uni_test(ewts_for_superscript + "'",
unicode_for_superscript + "\u0FB0");
just_ewts2uni_test(ewts_for_superscript + "y",
unicode_for_superscript + "\u0FB1");
just_ewts2uni_test(ewts_for_superscript + "r",
unicode_for_superscript + "\u0FB2");
ewts2uni_test(ewts_for_superscript + "l",
unicode_for_superscript + "\u0FB3");
ewts2uni_test(ewts_for_superscript + "sh",
unicode_for_superscript + "\u0FB4");
ewts2uni_test(ewts_for_superscript + "Sh",
unicode_for_superscript + "\u0FB5");
ewts2uni_test(ewts_for_superscript + "s",
unicode_for_superscript + "\u0FB6");
just_ewts2uni_test(ewts_for_superscript + "sh",
unicode_for_superscript + "\u0FB4");
just_ewts2uni_test(ewts_for_superscript + "Sh",
unicode_for_superscript + "\u0FB5");
just_ewts2uni_test(ewts_for_superscript + "s",
unicode_for_superscript + "\u0FB6");
ewts2uni_test(ewts_for_superscript + "h",
unicode_for_superscript + "\u0FB7");
ewts2uni_test(ewts_for_superscript + "a",
unicode_for_superscript + "\u0FB8");
just_ewts2uni_test(ewts_for_superscript + "a",
unicode_for_superscript + "\u0FB8");
ewts2uni_test(ewts_for_superscript + "k+Sh",
unicode_for_superscript
+ (false ? "\u0FB9" : "\u0f90\u0fb5"));
ewts2uni_test(ewts_for_superscript + "W",
unicode_for_superscript + "\u0FBA");
ewts2uni_test(ewts_for_superscript + "Y",
unicode_for_superscript + "\u0FBB");
ewts2uni_test(ewts_for_superscript + "R",
unicode_for_superscript + "\u0FBC");
just_ewts2uni_test(ewts_for_superscript + "W",
unicode_for_superscript + "\u0FBA");
just_ewts2uni_test(ewts_for_superscript + "Y",
unicode_for_superscript + "\u0FBB");
just_ewts2uni_test(ewts_for_superscript + "R",
unicode_for_superscript + "\u0FBC");
ewts2uni_test("\\u0FBE", "\u0FBE");
ewts2uni_test("\\u0FBF", "\u0FBF");
ewts2uni_test("\\u0FC0", "\u0FC0");
ewts2uni_test("\\u0FC1", "\u0FC1");
ewts2uni_test("\\u0FC2", "\u0FC2");
ewts2uni_test("\\u0FC3", "\u0FC3");
ewts2uni_test("\\u0FC4", "\u0FC4");
ewts2uni_test("\\u0FC5", "\u0FC5");
ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
ewts2uni_test("\\u0FC7", "\u0FC7");
ewts2uni_test("\\u0FC8", "\u0FC8");
ewts2uni_test("\\u0FC9", "\u0FC9");
ewts2uni_test("\\u0FCA", "\u0FCA");
ewts2uni_test("\\u0FCB", "\u0FCB");
ewts2uni_test("\\u0FCC", "\u0FCC");
ewts2uni_test("\\u0FCF", "\u0FCF");
ewts2uni_test("\\u0FD0", "\u0FD0");
ewts2uni_test("\\u0FD1", "\u0FD1");
just_ewts2uni_test("\\u0FBE", "\u0FBE");
just_ewts2uni_test("\\u0FBF", "\u0FBF");
just_ewts2uni_test("\\u0FC0", "\u0FC0");
just_ewts2uni_test("\\u0FC1", "\u0FC1");
just_ewts2uni_test("\\u0FC2", "\u0FC2");
just_ewts2uni_test("\\u0FC3", "\u0FC3");
just_ewts2uni_test("\\u0FC4", "\u0FC4");
just_ewts2uni_test("\\u0FC5", "\u0FC5");
just_ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
just_ewts2uni_test("\\u0FC7", "\u0FC7");
just_ewts2uni_test("\\u0FC8", "\u0FC8");
just_ewts2uni_test("\\u0FC9", "\u0FC9");
just_ewts2uni_test("\\u0FCA", "\u0FCA");
just_ewts2uni_test("\\u0FCB", "\u0FCB");
just_ewts2uni_test("\\u0FCC", "\u0FCC");
just_ewts2uni_test("\\u0FCF", "\u0FCF");
just_ewts2uni_test("\\u0FD0", "\u0FD0");
just_ewts2uni_test("\\u0FD1", "\u0FD1");
ewts2uni_test("_", "\u00a0"); // tibwn.ini says that the Unicode spec wants a non-breaking space.
ewts2uni_test("\\u534D", "\u534D");
ewts2uni_test("\\u5350", "\u5350");
ewts2uni_test("\u534D", "\u534D");
ewts2uni_test("\u5350", "\u5350");
ewts2uni_test("\\u0F88+k", "\u0F88\u0F90");
ewts2uni_test("\\u0F88+kh", "\u0F88\u0F91");
/* TODO(DLC)[EWTS->Tibetan]:
Do we want to ever generate \uf021? (NOT \u0f21, but the
private-use area (PUA) of Unicode). EWTS->TMW and this
makes sense, but EWTS->Unicode? */
ewts2uni_test("\\uF021", "\uF021");
ewts2uni_test("\\uF022", "\uF022");
ewts2uni_test("\\uF023", "\uF023");
ewts2uni_test("\\uF024", "\uF024");
ewts2uni_test("\\uF025", "\uF025");
ewts2uni_test("\\uF026", "\uF026");
ewts2uni_test("\\uF027", "\uF027");
ewts2uni_test("\\uF028", "\uF028");
ewts2uni_test("\\uF029", "\uF029");
ewts2uni_test("\\uF02A", "\uF02A");
ewts2uni_test("\\uF02B", "\uF02B");
ewts2uni_test("\\uF02C", "\uF02C");
ewts2uni_test("\\uF02D", "\uF02D");
ewts2uni_test("\\uF02E", "\uF02E");
ewts2uni_test("\\uF02F", "\uF02F");
ewts2uni_test("\\uF030", "\uF030");
ewts2uni_test("\\uF031", "\uF031");
ewts2uni_test("\\uF032", "\uF032");
ewts2uni_test("\\uF033", "\uF033");
ewts2uni_test("\\uF034", "\uF034");
ewts2uni_test("\\uF035", "\uF035");
ewts2uni_test("\\uF036", "\uF036");
ewts2uni_test("\\uF037", "\uF037");
ewts2uni_test("\\uF038", "\uF038");
ewts2uni_test("\\uF039", "\uF039");
ewts2uni_test("\\uF03A", "\uF03A");
ewts2uni_test("\\uF03B", "\uF03B");
ewts2uni_test("\\uF03C", "\uF03C");
ewts2uni_test("\\uF03D", "\uF03D");
ewts2uni_test("\\uF03E", "\uF03E");
ewts2uni_test("\\uF03F", "\uF03F");
ewts2uni_test("\\uF040", "\uF040");
ewts2uni_test("\\uF041", "\uF041");
ewts2uni_test("\\uF042", "\uF042");
makes sense, but EWTS->Unicode? Shouldn't we match the
behavior of TMW->Unicode, regardless? */
just_ewts2uni_test("\\uF021", "\uF021");
just_ewts2uni_test("\\uF022", "\uF022");
just_ewts2uni_test("\\uF023", "\uF023");
just_ewts2uni_test("\\uF024", "\uF024");
just_ewts2uni_test("\\uF025", "\uF025");
just_ewts2uni_test("\\uF026", "\uF026");
just_ewts2uni_test("\\uF027", "\uF027");
just_ewts2uni_test("\\uF028", "\uF028");
just_ewts2uni_test("\\uF029", "\uF029");
just_ewts2uni_test("\\uF02A", "\uF02A");
just_ewts2uni_test("\\uF02B", "\uF02B");
just_ewts2uni_test("\\uF02C", "\uF02C");
just_ewts2uni_test("\\uF02D", "\uF02D");
just_ewts2uni_test("\\uF02E", "\uF02E");
just_ewts2uni_test("\\uF02F", "\uF02F");
just_ewts2uni_test("\\uF030", "\uF030");
just_ewts2uni_test("\\uF031", "\uF031");
just_ewts2uni_test("\\uF032", "\uF032");
just_ewts2uni_test("\\uF033", "\uF033");
just_ewts2uni_test("\\uF034", "\uF034");
just_ewts2uni_test("\\uF035", "\uF035");
just_ewts2uni_test("\\uF036", "\uF036");
just_ewts2uni_test("\\uF037", "\uF037");
just_ewts2uni_test("\\uF038", "\uF038");
just_ewts2uni_test("\\uF039", "\uF039");
just_ewts2uni_test("\\uF03A", "\uF03A");
just_ewts2uni_test("\\uF03B", "\uF03B");
just_ewts2uni_test("\\uF03C", "\uF03C");
just_ewts2uni_test("\\uF03D", "\uF03D");
just_ewts2uni_test("\\uF03E", "\uF03E");
just_ewts2uni_test("\\uF03F", "\uF03F");
just_ewts2uni_test("\\uF040", "\uF040");
just_ewts2uni_test("\\uF041", "\uF041");
just_ewts2uni_test("\\uF042", "\uF042");
}
public void test__EWTS__long_wowels() {
ewts2uni_test("k-I~M`~X", "\u0f40\u0f81\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
ewts2uni_test("k-I~M`~X", "\u0f40\u0f71\u0f80\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
}
public void test__EWTS__32bit_unicode_escapes() {
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
ewts2uni_test("\\uF0010000",
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
just_ewts2uni_test("\\uF0010000",
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
if (RUN_FAILING_TESTS) assert_EWTS_error("\\uF0010000"); // TODO(DLC)[EWTS->Tibetan]: error subsystem is hosed
if (RUN_FAILING_TESTS) {
ewts2uni_test("\\ucafe0000",
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
just_ewts2uni_test("\\ucafe0000",
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
// TODO(dchandler): make it "\ucafe0000");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
@ -1003,8 +1129,8 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff");
}
ewts2uni_test("\\u00000f00", "\u0f00");
ewts2uni_test("\\u00000f40", "\u0f40");
just_ewts2uni_test("\\u00000f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: EWTS->TMW is broken for this
just_ewts2uni_test("\\u00000f40", "\u0f40");
if (RUN_FAILING_TESTS) {
ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff");
@ -1089,22 +1215,33 @@ public class EWTSTest extends TestCase {
if (RUN_FAILING_TESTS) {
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f71\u0f80");
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f71\u0f80");
}
}
public void testMoreMiscellany() {
ewts2uni_test("k+Sh+R-i", "\u0f40\u0fb5\u0fbc\u0f80");
ewts2uni_test("k\\u0f35", "\u0f40\u0f35");
ewts2uni_test("k\\u0f72", "\u0f40\u0f72");
ewts2uni_test("k\\u0f73", "\u0f40\u0f71\u0f72");
ewts2uni_test("k\\u0f75", "\u0f40\u0f71\u0f74");
ewts2uni_test("k\\u0f3e", "\u0f40\u0f3e");
ewts2uni_test("k\\u0f3f", "\u0f40\u0f3f");
ewts2uni_test("kHai", "\u0f40\u0f7f\u0f68\u0f7b"); // TODO(DLC)[EWTS->Tibetan]: Is this correct?
ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("r-I", "\u0f62\u0f81");
ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("l-I", "\u0f63\u0f81");
ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
just_ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
just_ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
ewts2uni_test("'jam",
"\u0f60\u0f47\u0f58");
ewts2uni_test("jamX 'jam~X",

View file

@ -21,6 +21,7 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import java.util.HashMap;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.DuffCode;
@ -102,124 +103,172 @@ public final class EWTSTraits implements TTraits {
public boolean isWowel(String s) {
return (getUnicodeForWowel(s) != null);
/* TODO(DLC)[EWTS->Tibetan]: test ko+m+e etc.
// TODO(DLC)[EWTS->Tibetan]: all non-consonant combiners? 0f71 0f87 etc.?
if (s.length() == 1 && isUnicodeWowel(s.charAt(0))) return true;
return ("a".equals(s)
|| "e".equals(s)
|| "i".equals(s)
|| "o".equals(s)
|| "u".equals(s)
|| "U".equals(s)
|| "I".equals(s)
|| "A".equals(s)
|| "-i".equals(s)
|| "-I".equals(s)
|| "au".equals(s)
|| "ai".equals(s)
|| isWowelThatRequiresAChen(s));
// TODO(DLC)[EWTS->Tibetan]:???
*/
}
public String aVowel() { return "a"; }
public String aVowel() { return THDLWylieConstants.WYLIE_aVOWEL; }
public boolean isPostsuffix(String s) {
return ("s".equals(s) || "d".equals(s));
}
public boolean isPrefix(String l) {
return ("'".equals(l)
|| "m".equals(l)
|| "b".equals(l)
|| "d".equals(l)
|| "g".equals(l));
return (THDLWylieConstants.ACHUNG.equals(l)
|| THDLWylieConstants.MA.equals(l)
|| THDLWylieConstants.BA.equals(l)
|| THDLWylieConstants.DA.equals(l)
|| THDLWylieConstants.GA.equals(l));
}
public boolean isSuffix(String l) {
return ("s".equals(l)
|| "g".equals(l)
|| "d".equals(l)
|| "m".equals(l)
|| "'".equals(l)
|| "b".equals(l)
|| "ng".equals(l)
|| "n".equals(l)
|| "l".equals(l)
|| "r".equals(l));
return (isPrefix(l)
|| THDLWylieConstants.SA.equals(l)
|| THDLWylieConstants.NGA.equals(l)
|| THDLWylieConstants.NA.equals(l)
|| THDLWylieConstants.LA.equals(l)
|| THDLWylieConstants.RA.equals(l));
}
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForConsonant(String l) { return l; }
/** Returns the best EWTS for l, which is often l but not always
* thanks to Unicode escapes. NOTE: For "\u0f42", you don't want
* to return "g" lest "\\u0f42ya " become the wrong thing under
* EWTS->Unicode. */
public String getEwtsForConsonant(String l) {
return helpGetEwts(l);
}
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForOther(String l) { return l; }
/** Returns the best EWTS for l, which is often l but not always
* thanks to Unicode escapes. */
public String getEwtsForOther(String l) {
return helpGetEwts(l);
}
private String helpGetEwts(String l) {
if (l.length() == 1
&& ((l.charAt(0) >= THDLWylieConstants.PUA_MIN
&& l.charAt(0) <= THDLWylieConstants.PUA_MAX)
|| 0 <= "\u0F01\u0F09\u0F0A\u0F10\u0F12\u0F13\u0F15\u0F16\u0F17\u0F18\u0F19\u0F1A\u0F1B\u0F1C\u0F1D\u0F1E\u0F1F\u0F2A\u0F2B\u0F2C\u0F2D\u0F2E\u0F2F\u0F30\u0F31\u0F32\u0F33\u0F36\u0F38\u0F86\u0F87\u0F88\u0F89\u0F8A\u0F8B\u0FBE\u0FBF\u0FC0\u0FC1\u0FC2\u0FC3\u0FC4\u0FC5\u0FC6\u0FC7\u0FC8\u0FC9\u0FCA\u0FCB\u0FCC\u0FCF\u5350\u534D".indexOf(l.charAt(0)))) {
return UnicodeUtils.unicodeCodepointToString(l.charAt(0), false, "\\u", true);
}
if (false) { // TODO(dchandler): it's too late in the game to do this. EWTS->TMW is broken for \u0f00, \u0f02, and \u0f03 right now, fix that.
if ("\u0f02".equals(l)) return "u~M`H"; // too long for a single hash key, see?
if ("\u0f03".equals(l)) return "u~M`:"; // ditto
}
return l;
}
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForWowel(String l) { return l; }
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws IllegalArgumentException
/** If needle is found in haystack, then haystack without the
* first instance of needle is returned. Otherwise haystack
* itself is returned. */
private static String removeFirstMatch(String haystack, String needle) {
int ix;
if ((ix = haystack.indexOf(needle)) >= 0) {
StringBuffer sb = new StringBuffer(haystack);
sb.replace(ix, ix + needle.length(), "");
return sb.toString();
}
return haystack;
}
private static HashMap bestEwtsMap = null;
private static String getBestEwtsForSingleWowel(String wowel) {
// NOTE: Not MT-safe
if (null == bestEwtsMap) {
bestEwtsMap = new HashMap(20);
// Unicode-escape sequences are handled early. To be
// correct, we must "unescape" here any Unicode escape to
// whatever tibwn.ini has. (TODO(dchandler): tibwn.ini
// has this info, use that instead of duplicating it in
// this code.)
bestEwtsMap.put("\u0f18", THDLWylieConstants.U0F18);
bestEwtsMap.put("\u0f19", THDLWylieConstants.U0F19);
bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
bestEwtsMap.put("\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU);
bestEwtsMap.put("\u0f3e", THDLWylieConstants.U0F3E);
bestEwtsMap.put("\u0f3f", THDLWylieConstants.U0F3F);
bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
bestEwtsMap.put("\u0f86", THDLWylieConstants.U0F86);
bestEwtsMap.put("\u0f87", THDLWylieConstants.U0F87);
bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
bestEwtsMap.put("\u0f71", THDLWylieConstants.A_VOWEL);
bestEwtsMap.put("\u0f72", THDLWylieConstants.i_VOWEL);
bestEwtsMap.put("\u0f74", THDLWylieConstants.u_VOWEL);
bestEwtsMap.put("\u0f7a", THDLWylieConstants.e_VOWEL);
bestEwtsMap.put("\u0f7b", THDLWylieConstants.ai_VOWEL);
bestEwtsMap.put("\u0f7c", THDLWylieConstants.o_VOWEL);
bestEwtsMap.put("\u0f7d", THDLWylieConstants.au_VOWEL);
bestEwtsMap.put("\u0f7e", THDLWylieConstants.BINDU);
bestEwtsMap.put("\u0f80", THDLWylieConstants.reverse_i_VOWEL);
bestEwtsMap.put("\u0f81", THDLWylieConstants.reverse_I_VOWEL);
bestEwtsMap.put("\u0f73", THDLWylieConstants.I_VOWEL); // not in tibwn.ini
bestEwtsMap.put("\u0f75", THDLWylieConstants.U_VOWEL); // not in tibwn.ini
}
String mapping = (String)bestEwtsMap.get(wowel);
if (null != mapping)
return mapping;
else
return wowel;
}
public void getDuffForWowel(ArrayList duff, DuffCode preceding,
String wowel)
throws ConversionException
{
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
boolean preceding_added[] = new boolean[] { false };
String[] wowels = wowel.split("\\+");
for (int i = 0; i < wowels.length; i++) {
getDuffForSingleWowel(duff, preceding,
getBestEwtsForSingleWowel(wowels[i]),
preceding_added);
}
}
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
// Order matters here.
boolean context_added[] = new boolean[] { false };
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
/** Wowels can stack. This works on a single wowel. */
private void getDuffForSingleWowel(ArrayList duff, DuffCode preceding,
String wowel, boolean preceding_added[])
throws ConversionException
{
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { // TODO(dchandler): ka+o deserves at least a warning. kaM, though, does not. Do we handle it?
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, preceding_added);
wowel = "";
} else {
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking
if (wowel.indexOf(THDLWylieConstants.U_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.reverse_I_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_I_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.I_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.A_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.e_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
}
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff.add(TibetanMachineWeb.getGlyph("~X"));
} else if (wowel.indexOf("X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff.add(TibetanMachineWeb.getGlyph("X"));
// We call these combining because the TMW font treats
// such a vowel specially depending on the preceding glyph
// with which it combines.
String combining_wowels[] = new String[] {
// order does not matter
THDLWylieConstants.U_VOWEL,
THDLWylieConstants.reverse_I_VOWEL,
THDLWylieConstants.I_VOWEL,
THDLWylieConstants.A_VOWEL,
THDLWylieConstants.ai_VOWEL,
THDLWylieConstants.reverse_i_VOWEL,
THDLWylieConstants.i_VOWEL,
THDLWylieConstants.e_VOWEL,
THDLWylieConstants.o_VOWEL,
THDLWylieConstants.au_VOWEL,
THDLWylieConstants.u_VOWEL
};
for (int i = 0; i < combining_wowels.length; i++) {
if (wowel.equals(combining_wowels[i])) {
TibTextUtils.getVowel(duff, preceding, combining_wowels[i],
preceding_added);
wowel = removeFirstMatch(wowel, combining_wowels[i]);
}
}
}
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
// rely on the fact that we know every Wylie wowel that
// contains 'M'. Let's, instead, parse the wowel.
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
if (wowel.equals(THDLWylieConstants.BINDU)) {
DuffCode last = null;
if (!context_added[0]) {
if (!preceding_added[0]) {
last = preceding;
} else if (duff.size() > 0) {
last = (DuffCode)duff.get(duff.size() - 1);
@ -227,52 +276,77 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
}
TibTextUtils.getBindu(duff, last);
context_added[0] = true;
preceding_added[0] = true;
wowel = removeFirstMatch(wowel, THDLWylieConstants.BINDU);
}
if (!context_added[0]) {
if (!preceding_added[0]) {
duff.add(preceding);
preceding_added[0] = true;
}
if (wowel.indexOf('H') >= 0)
duff.add(TibetanMachineWeb.getGlyph("H"));
int ix;
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
String standalone_wowels[] = new String[] {
// order does not matter
// This likely won't look good! TMW has glyphs for [va]
// and [fa], so use that transliteration if you care, not
// [ph^] or [b^].
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), "");
wowel = sb.toString();
THDLWylieConstants.WYLIE_TSA_PHRU,
THDLWylieConstants.U0F35,
THDLWylieConstants.U0F37,
THDLWylieConstants.U0F7F,
THDLWylieConstants.U0F82,
THDLWylieConstants.U0F83,
THDLWylieConstants.U0F86,
THDLWylieConstants.U0F87,
THDLWylieConstants.U0F19,
THDLWylieConstants.U0F18,
THDLWylieConstants.U0FC6,
THDLWylieConstants.U0F3E,
THDLWylieConstants.U0F3F,
THDLWylieConstants.U0F84,
};
for (int i = 0; i < standalone_wowels.length; i++) {
if (wowel.equals(standalone_wowels[i])) {
ThdlDebug.verify(preceding_added[0]);
duff.add(TibetanMachineWeb.getGlyph(standalone_wowels[i]));
wowel = removeFirstMatch(wowel, standalone_wowels[i]);
}
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
wowel = sb.toString();
}
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on
// We verify that no part of wowel is discarded.
if (wowel.length() > 0) {
throw new ConversionException(
"Full wowel was not handled, there remains: " + wowel);
}
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the
// unicode output? i see (with tmuni font) lone bindus
// without glyphs to stack on
}
public String getUnicodeForWowel(String wowel) {
if ("a".equals(wowel))
if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
return "";
return helpGetUnicodeForWowel(wowel);
}
private String helpGetUnicodeForWowel(String wowel) {
if ("a".equals(wowel))
if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
return null; // ko+a+e is invalid, e.g.
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0)))
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0))) {
if ("\u0f75".equals(wowel))
return "\u0f71\u0f74"; // \u0f75 is discouraged
if ("\u0f81".equals(wowel))
return "\u0f71\u0f80"; // \u0f81 is discouraged
if ("\u0f73".equals(wowel))
return "\u0f71\u0f72"; // \u0f73 is discouraged
if ("\u0f79".equals(wowel))
return "\u0fb3\u0f81"; // \u0f79 is discouraged
if ("\u0f78".equals(wowel))
return "\u0fb3\u0f80"; // \u0f78 is discouraged
return wowel;
}
// handle o+u, etc.
int i;
if ((i = wowel.indexOf("+")) >= 0) {
@ -290,27 +364,27 @@ public final class EWTSTraits implements TTraits {
} else {
// Handle vowels. (TODO(dchandler): tibwn.ini has this
// info, use that instead of duplicating it in this code.)
if ("i".equals(wowel)) return "\u0f72";
if ("u".equals(wowel)) return "\u0f74";
if ("A".equals(wowel)) return "\u0f71";
if ("U".equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
if ("e".equals(wowel)) return "\u0f7a";
if ("o".equals(wowel)) return "\u0f7c";
if ("-i".equals(wowel)) return "\u0f80";
if ("ai".equals(wowel)) return "\u0f7b";
if ("au".equals(wowel)) return "\u0f7d";
if ("-I".equals(wowel)) return "\u0f81";
if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
if (THDLWylieConstants.i_VOWEL.equals(wowel)) return "\u0f72";
if (THDLWylieConstants.u_VOWEL.equals(wowel)) return "\u0f74";
if (THDLWylieConstants.A_VOWEL.equals(wowel)) return "\u0f71";
if (THDLWylieConstants.U_VOWEL.equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
if (THDLWylieConstants.e_VOWEL.equals(wowel)) return "\u0f7a";
if (THDLWylieConstants.o_VOWEL.equals(wowel)) return "\u0f7c";
if (THDLWylieConstants.reverse_i_VOWEL.equals(wowel)) return "\u0f80";
if (THDLWylieConstants.ai_VOWEL.equals(wowel)) return "\u0f7b";
if (THDLWylieConstants.au_VOWEL.equals(wowel)) return "\u0f7d";
if (THDLWylieConstants.reverse_I_VOWEL.equals(wowel)) return "\u0f71\u0f80"; // \u0f81 is discouraged
if (THDLWylieConstants.I_VOWEL.equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
// TODO(DLC)[EWTS->Tibetan]: test, test, test.
if ("M".equals(wowel)) return "\u0f7e";
if ("H".equals(wowel)) return "\u0f7f";
if ("?".equals(wowel)) return "\u0f84";
if ("~M".equals(wowel)) return "\u0f83";
if ("~M`".equals(wowel)) return "\u0f82";
if ("X".equals(wowel)) return "\u0f37";
if ("~X".equals(wowel)) return "\u0f35";
if ("^".equals(wowel)) return "\u0f39";
// TODO(DLC)[EWTS->Tibetan]: what about \u0f3e and \u0f3f!!!!
if (THDLWylieConstants.BINDU.equals(wowel)) return "\u0f7e";
if (THDLWylieConstants.U0F7F.equals(wowel)) return "\u0f7f";
if (THDLWylieConstants.U0F84.equals(wowel)) return "\u0f84";
if (THDLWylieConstants.U0F83.equals(wowel)) return "\u0f83";
if (THDLWylieConstants.U0F82.equals(wowel)) return "\u0f82";
if (THDLWylieConstants.U0F37.equals(wowel)) return "\u0f37";
if (THDLWylieConstants.U0F35.equals(wowel)) return "\u0f35";
if (THDLWylieConstants.WYLIE_TSA_PHRU.equals(wowel)) return "\u0f39";
return null;
}
@ -324,9 +398,9 @@ public final class EWTSTraits implements TTraits {
for (int i = 0; i < l.length(); i++) {
char ch = l.charAt(i);
if ((ch < '\u0f00' || ch > '\u0fff')
&& SAUVASTIKA != ch
&& SWASTIKA != ch
&& (ch < PUA_MIN || ch > PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
&& THDLWylieConstants.SAUVASTIKA != ch
&& THDLWylieConstants.SWASTIKA != ch
&& (ch < THDLWylieConstants.PUA_MIN || ch > THDLWylieConstants.PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
&& '\n' != ch
&& '\r' != ch) {
// TODO(DLC)[EWTS->Tibetan]: Is this the place
@ -346,6 +420,8 @@ public final class EWTSTraits implements TTraits {
if ("Y".equals(l)) return "\u0fbb";
if ("W".equals(l)) return "\u0fba";
// TODO(dchandler): use tibwn.ini -- it has this same info.
// g+h etc. should not be inputs to this function, but for
// completeness they're here.
if ("k".equals(l)) return "\u0F90";
@ -455,18 +531,24 @@ public final class EWTSTraits implements TTraits {
public boolean vowelsMayStack() { return true; }
public boolean isWowelThatRequiresAChen(String s) {
// TODO(DLC)[EWTS->Tibetan]: fix me!
return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0))
|| "?MHX^".indexOf(s.charAt(0)) >= 0))
|| "~X".equals(s)
|| "~M".equals(s)
|| "~M`".equals(s)
);
// TODO(DLC)[EWTS->Tibetan]: not sure why we pick this subset.
// Why don't we use a negative set of regular vowels like "i",
// "o", etc.?
return ((s.length() == 1
&& (isUnicodeWowelThatRequiresAChen(s.charAt(0))))
|| THDLWylieConstants.BINDU.equals(s)
|| THDLWylieConstants.U0F35.equals(s)
|| THDLWylieConstants.U0F37.equals(s)
|| THDLWylieConstants.U0F7F.equals(s)
|| THDLWylieConstants.U0F82.equals(s)
|| THDLWylieConstants.U0F83.equals(s)
|| THDLWylieConstants.U0F84.equals(s)
|| THDLWylieConstants.WYLIE_TSA_PHRU.equals(s));
}
public boolean isUnicodeWowelThatRequiresAChen(char ch) {
// TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits
return "\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0;
return ("\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0);
}
public boolean couldBeValidStack(TPairList pl) {
@ -485,33 +567,9 @@ public final class EWTSTraits implements TTraits {
public boolean stackingMustBeExplicit() { return true; }
public String U0F7F() { return "H"; }
public String U0F7F() { return THDLWylieConstants.U0F7F; }
public String U0F35() { return "~X"; }
public String U0F35() { return THDLWylieConstants.U0F35; }
public String U0F37() { return "X"; }
/** The EWTS standard mentions this character specifically. See
http://www.symbols.com/encyclopedia/15/155.html to learn about
its meaning as relates to Buddhism.
*/
static final char SAUVASTIKA = '\u534d';
/** The EWTS standard mentions this character specifically. See
http://www.symbols.com/encyclopedia/15/151.html to learn about
its meaning as relates to Buddhism.
*/
static final char SWASTIKA = '\u5350';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
static final char PUA_MIN = '\uf021';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
static final char PUA_MAX = '\uf0ff';
public String U0F37() { return THDLWylieConstants.U0F37; }
}

View file

@ -21,6 +21,8 @@ package org.thdl.tib.text.ttt;
import java.math.BigInteger;
import java.util.ArrayList;
import org.thdl.tib.text.THDLWylieConstants;
/**
* This singleton class is able to break up Strings of EWTS text (for
* example, an entire sutra file) into tsheg bars, comments, etc.
@ -76,8 +78,11 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
StringBuffer sb = new StringBuffer(s);
ExpandEscapeSequences(sb);
int sl = sb.length();
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working
// TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working (probably because)
// TODO(DLC)[EWTS->Tibetan]:: '#', in ewts->tmw, is not working
//
// TODO(DLC)[EWTS->Tibetan]:: 'jamX one is not working in ewts->tmw mode in the sense that X appears under the last glyph of the three instead of the middle glyph
//
// TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working
for (int i = 0; i < sl; i++) { // i is modified in the loop, also
if (isValidInsideTshegBar(sb.charAt(i))) {
@ -102,14 +107,14 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
al.add(new TString("EWTS", "//",
TString.TIBETAN_PUNCTUATION));
++i;
} else if ((sb.charAt(i) >= EWTSTraits.PUA_MIN
&& sb.charAt(i) <= EWTSTraits.PUA_MAX)
} else if ((sb.charAt(i) >= THDLWylieConstants.PUA_MIN
&& sb.charAt(i) <= THDLWylieConstants.PUA_MAX)
|| (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17')
|| (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f')
|| (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc')
|| (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1')
|| (EWTSTraits.SAUVASTIKA == sb.charAt(i))
|| (EWTSTraits.SWASTIKA == sb.charAt(i))
|| (THDLWylieConstants.SAUVASTIKA == sb.charAt(i))
|| (THDLWylieConstants.SWASTIKA == sb.charAt(i))
|| (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i))
>= 0)) {
al.add(new TString("EWTS", sb.substring(i, i+1),
@ -186,7 +191,31 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
// leave x == -1
}
if (x >= 0) {
sb.replace(i, i + "\\uXXXX".length(), new String(new char[] { (char)x }));
String replacement = String.valueOf((char)x);
if (false) {
// This would ruin EWTS->Unicode to
// help EWTS->TMW, so we don't do it.
// TODO(dchandler): Fix EWTS->TMW for
// \u0f02 and \u0f03.
// A nasty little HACK for you:
//
// TODO(dchandler): we may create "ga..u~M`H..ha" which may cause errors
String hack = null;
if ('\u0f02' == x) {
hack = "u~M`H"; // hard-coded EWTS
} else if ('\u0f03' == x) {
hack = "u~M`:"; // hard-coded EWTS
} else if ('\u0f00' == x) {
hack = "oM"; // hard-coded EWTS
}
if (null != hack) {
replacement = "." + hack + "."; // hard-coded EWTS disambiguators
i += replacement.length() - 1;
}
}
sb.replace(i, i + "\\uXXXX".length(), replacement);
continue;
}
}

View file

@ -29,6 +29,7 @@ import java.util.ArrayList;
import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
@ -699,7 +700,13 @@ public class TConverter {
} else {
String wy = ttraits.getEwtsForOther(s.getText());
if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
duff = new Object[] { TibetanMachineWeb.maybeGetGlyph(wy) };
if (null == duff[0]) {
duff[0] =
ErrorsAndWarnings.getMessage(
137, shortMessages,
s.getText(), ttraits);
}
}
}
}
@ -730,8 +737,8 @@ public class TConverter {
ThdlDebug.verify(1 == s.getText().length());
if (null != writer) {
char ch = s.getText().charAt(0);
if (ch >= EWTSTraits.PUA_MIN
&& ch <= EWTSTraits.PUA_MAX) {
if (ch >= THDLWylieConstants.PUA_MIN
&& ch <= THDLWylieConstants.PUA_MAX) {
hasErrors = true;
String errorMessage =
"[#ERROR "

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.ArrayList;
import java.util.HashMap;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.TGCPair;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlDebug;
@ -710,47 +711,49 @@ class TPairList {
wylieForConsonant.append(lastPair.getWylie(true, false));
String hashKey = wylieForConsonant.toString();
// Because EWTS has special handling for full-formed
// subjoined consonants, we have special handling here.
if ("r+y".equals(hashKey))
hashKey = "r+Y";
else if ("y+y".equals(hashKey))
hashKey = "y+Y";
else if ("N+D+y".equals(hashKey))
hashKey = "N+D+Y";
else if ("N+D+r+y".equals(hashKey))
hashKey = "N+D+R+y";
else if ("k+Sh+r".equals(hashKey))
hashKey = "k+Sh+R";
if (traits.isACIP()) {
// Because EWTS has special handling for full-formed
// subjoined consonants, we have special handling here.
if ("r+y".equals(hashKey))
hashKey = "r+Y";
else if ("y+y".equals(hashKey))
hashKey = "y+Y";
else if ("N+D+y".equals(hashKey))
hashKey = "N+D+Y";
else if ("N+D+r+y".equals(hashKey))
hashKey = "N+D+R+y";
else if ("k+Sh+r".equals(hashKey))
hashKey = "k+Sh+R";
// TPair.getWylie(..) returns "W" sometimes when "w" is what
// really should be returned. ("V" always causes "w" to be
// returned, which is fine.) We'll change "W" to "w" here if
// we need to. We do it only for a few known stacks (the ones
// in TMW).
if ("W".equals(hashKey))
hashKey = "w";
else if ("W+y".equals(hashKey))
hashKey = "w+y";
else if ("W+r".equals(hashKey))
hashKey = "w+r";
else if ("W+n".equals(hashKey))
hashKey = "w+n";
else if ("W+W".equals(hashKey))
hashKey = "w+W";
// TPair.getWylie(..) returns "W" sometimes when "w" is what
// really should be returned. ("V" always causes "w" to be
// returned, which is fine.) We'll change "W" to "w" here if
// we need to. We do it only for a few known stacks (the ones
// in TMW).
if ("W".equals(hashKey))
hashKey = "w";
else if ("W+y".equals(hashKey))
hashKey = "w+y";
else if ("W+r".equals(hashKey))
hashKey = "w+r";
else if ("W+n".equals(hashKey))
hashKey = "w+n";
else if ("W+W".equals(hashKey))
hashKey = "w+W";
if ("r+Y".equals(hashKey)
|| "r+W".equals(hashKey)
|| "r+sh".equals(hashKey)
|| "r+sh+y".equals(hashKey)
|| "r+Sh".equals(hashKey)
|| "r+Sh+N".equals(hashKey)
|| "r+Sh+N+y".equals(hashKey)
|| "r+Sh+m".equals(hashKey)
|| "r+Sh+y".equals(hashKey)
|| "r+s".equals(hashKey)
) {
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
if ("r+Y".equals(hashKey)
|| "r+W".equals(hashKey)
|| "r+sh".equals(hashKey)
|| "r+sh+y".equals(hashKey)
|| "r+Sh".equals(hashKey)
|| "r+Sh+N".equals(hashKey)
|| "r+Sh+N+y".equals(hashKey)
|| "r+Sh+m".equals(hashKey)
|| "r+Sh+y".equals(hashKey)
|| "r+s".equals(hashKey)
) {
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
}
}
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
@ -774,7 +777,7 @@ class TPairList {
traits.getDuffForWowel(duffsAndErrors,
TibetanMachineWeb.getGlyph(hashKey),
lastPair.getRight());
} catch (IllegalArgumentException e) {
} catch (ConversionException e) {
// TODO(dchandler): Error 137 isn't the perfect
// message. Try EWTS [RAM], e.g. to see why. It acts
// like we're trying to find a single glyph for (R

View file

@ -20,7 +20,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt;
import java.util.Arrays;
import java.util.List;
import java.util.Comparator;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.THDLWylieConstants;
/** A factory for creating {@link TPairList TPairLists} from
* Strings of ACIP.
@ -374,6 +379,85 @@ class TPairListFactory {
return 0;
}
/** Returns a TPair just like tp (sometimes the very same,
* unchanged instance) except that the wowel, if present, is in
* the order that Section 9.11 of the Unicode Standard, version
* 4.0.1, would have us use. */
private static TPair ewtsSortWowels(TPair tp) {
if (tp.getRight() != null
&& tp.getRight().length() > 0
&& !"+".equals(tp.getRight())) {
class WowelComparator implements Comparator {
/** @see
* org.thdl.tib.text.tshegbar.UnicodeUtils#fixSomeOrderingErrorsInTibetanUnicode(StringBuffer) */
private List order = Arrays.asList(new String[] {
// equivalence class:
"\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU,
// equivalence class:
THDLWylieConstants.WYLIE_aVOWEL,
// equivalence class:
"\u0f71", THDLWylieConstants.A_VOWEL,
"\u0f73", THDLWylieConstants.I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f75", THDLWylieConstants.U_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f81", THDLWylieConstants.reverse_I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f74", THDLWylieConstants.u_VOWEL,
// equivalence class:
"\u0f72", THDLWylieConstants.i_VOWEL,
"\u0f7a", THDLWylieConstants.e_VOWEL,
"\u0f7b", THDLWylieConstants.ai_VOWEL,
"\u0f7c", THDLWylieConstants.o_VOWEL,
"\u0f7d", THDLWylieConstants.au_VOWEL,
"\u0f80", THDLWylieConstants.reverse_i_VOWEL,
// equivalence class:
"\u0f7e", THDLWylieConstants.BINDU,
"\u0f82", THDLWylieConstants.U0F82,
"\u0f83", THDLWylieConstants.U0F83,
"\u0f86", THDLWylieConstants.U0F86,
"\u0f87", THDLWylieConstants.U0F87,
// NOTE: we always say "e" comes before "o" but
// either order would work.
/* TODO(dchandler): should these go with other
* under-line wowels like \u0f74? They're for the
* whole tsheg-bar, so they're oddballs...
*
* bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
*
* bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
*
* bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
*
* bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
*/
});
public int compare(Object o1, Object o2) {
int i1 = order.indexOf(o1);
int i2 = order.indexOf(o2);
if (i1 < 0) i1 = order.size();
if (i2 < 0) i2 = order.size();
return i1 - i2;
}
}
String wowels[] = tp.getRight().split("\\+");
java.util.Arrays.sort(wowels, new WowelComparator());
StringBuffer sb = new StringBuffer();
for (int i = 0; i < wowels.length; i++) {
sb.append(wowels[i]);
if (i + 1 < wowels.length)
sb.append('+');
}
return new TPair(tp.getTraits(), tp.getLeft(), sb.toString());
} else {
return tp;
}
}
// TODO(DLC)[EWTS->Tibetan]: doc
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
@ -383,7 +467,9 @@ class TPairListFactory {
StringBuffer ewtsBuf = new StringBuffer(ewts);
int howMuchBuf[] = new int[1];
TPair head = getFirstConsonantAndVowel(ewtsBuf, howMuchBuf, ttraits);
TPair head = ewtsSortWowels(getFirstConsonantAndVowel(ewtsBuf,
howMuchBuf,
ttraits));
int howMuch = howMuchBuf[0];
TPairList tail;
@ -448,7 +534,7 @@ class TPairListFactory {
* {N+YE} or an error or whatever you like. howMuch[0] will be
* set to the number of characters of tx that this call has
* consumed. */
private static TPair getFirstConsonantAndVowel(StringBuffer tx, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it?
private static TPair getFirstConsonantAndVowel(StringBuffer tx,
int howMuch[],
TTraits ttraits) {
// To handle EWTS "phywa\\u0f84\u0f86" [yes that's two slashes

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.HashSet;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions;
@ -66,8 +67,8 @@ public class TString {
&& type != END_SLASH
&& (type != UNICODE_CHARACTER
|| !(UnicodeUtils.isInTibetanRange(ch = getText().charAt(0))
|| (ch >= EWTSTraits.PUA_MIN
&& ch <= EWTSTraits.PUA_MAX))));
|| (ch >= THDLWylieConstants.PUA_MIN
&& ch <= THDLWylieConstants.PUA_MAX))));
}
/** For ACIP [#COMMENTS] and EWTS (DLC FIXME: what are EWTS comments?) */

View file

@ -136,7 +136,8 @@ public interface TTraits {
/** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws ConversionException;
/** Human-readable name of this transliteration for short error
strings. */

View file

@ -43,7 +43,7 @@ public class VerboseUnicodeDump {
java.nio.charset.Charset.forName(args[1]));
int x;
while (-1 != (x = fr.read())) {
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, ""));
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "", false));
}
System.exit(0);
}