Numerous EWTS->Unicode and especially EWTS->TMW improvements.

Fixed ordering of Unicode wowels.  [ku+A] gives the correct Unicode
now, e.g.

EWTS->TMW looks better for some wacky wowels like, I'm guessing here, [ku+A].

EWTS->TMW should now give errors any time the full input isn't used.
Previously, wacky wowels like [kai+-i] would lead to some droppage.

EWTS->TMW->Unicode testing is now in effect.  This found a ton of
EWTS->TMW bugs, most or all of which are fixed now.

TMW->Unicode is improved/fixed for {
\u5350,\u534D,\u0F88+k,\u0F88+kh,U }.  (Why U?  "\u0f75" is
discouraged in favor of "\u0f71\u0f74".)

NOTE: TMW_RTF_TO_THDL_WYLIETest is still disabled for the nightly
builds' sake, but I ran it in my sandbox and it passed.
This commit is contained in:
dchandler 2005-07-11 02:51:06 +00:00
parent 36122778b4
commit 6d419fe641
19 changed files with 1014 additions and 547 deletions

View file

@ -1377,7 +1377,7 @@ public void paste(int offset)
if (TibetanMachineWeb.isPunc(val)) { //punctuation if (TibetanMachineWeb.isPunc(val)) { //punctuation
val = TibetanMachineWeb.getWylieForPunc(val); val = TibetanMachineWeb.getWylieForPunc(val);
if (val.charAt(0) == TibetanMachineWeb.BINDU) if (val.startsWith(THDLWylieConstants.BINDU))
putBindu(); putBindu();
else { else {

View file

@ -242,8 +242,8 @@
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 K+S+MA\fs28\i0\b0\ul0\cf0 font 2; ord 54\par \f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 K+S+MA\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 K+S+YA\fs28\i0\b0\ul0\cf0 font 2; ord 55\par \f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 K+S+YA\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 K+S+VA\fs28\i0\b0\ul0\cf0 font 2; ord 56\par \f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 K+S+VA\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par \f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+k to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par \f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F88+kh to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 KH+KHA\fs28\i0\b0\ul0\cf0 font 2; ord 59\par \f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 KH+KHA\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 KH+NA\fs28\i0\b0\ul0\cf0 font 2; ord 60\par \f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 KH+NA\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 KH+LA\fs28\i0\b0\ul0\cf0 font 2; ord 61\par \f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 KH+LA\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
@ -812,8 +812,8 @@
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F13 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 94\par \f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u0F13 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie < to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 95\par \f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie < to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie > to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 96\par \f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie > to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par \f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u5350 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>> to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par \f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\u534D to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF038 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 99\par \f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF038 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF037 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 100\par \f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0 [# JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert glyph with THDL Extended Wylie \\uF037 to ACIP. Please transcribe this yourself.]\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 o\fs28\i0\b0\ul0\cf0 font 5; ord 101\par \f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 o\fs28\i0\b0\ul0\cf0 font 5; ord 101\par

View file

@ -242,8 +242,8 @@
\f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 k+s+ma\fs28\i0\b0\ul0\cf0 font 2; ord 54\par \f1\fs144 >\f3 6\f1 >\f2\i0\b0\ul0 k+s+ma\fs28\i0\b0\ul0\cf0 font 2; ord 54\par
\f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 k+s+ya\fs28\i0\b0\ul0\cf0 font 2; ord 55\par \f1\fs144 >\f3 7\f1 >\f2\i0\b0\ul0 k+s+ya\fs28\i0\b0\ul0\cf0 font 2; ord 55\par
\f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 k+s+wa\fs28\i0\b0\ul0\cf0 font 2; ord 56\par \f1\fs144 >\f3 8\f1 >\f2\i0\b0\ul0 k+s+wa\fs28\i0\b0\ul0\cf0 font 2; ord 56\par
\f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=59 character=;/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 57\par \f1\fs144 >\f3 9\f1 >\f2\i0\b0\ul0\\u0F88+k\fs28\i0\b0\ul0\cf0 font 2; ord 57\par
\f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb2 charNum=60 character=</> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 2; ord 58\par \f1\fs144 >\f3 :\f1 >\f2\i0\b0\ul0\\u0F88+kh\fs28\i0\b0\ul0\cf0 font 2; ord 58\par
\f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 kh+kha\fs28\i0\b0\ul0\cf0 font 2; ord 59\par \f1\fs144 >\f3 ;\f1 >\f2\i0\b0\ul0 kh+kha\fs28\i0\b0\ul0\cf0 font 2; ord 59\par
\f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 kh+na\fs28\i0\b0\ul0\cf0 font 2; ord 60\par \f1\fs144 >\f3 <\f1 >\f2\i0\b0\ul0 kh+na\fs28\i0\b0\ul0\cf0 font 2; ord 60\par
\f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 kh+la\fs28\i0\b0\ul0\cf0 font 2; ord 61\par \f1\fs144 >\f3 =\f1 >\f2\i0\b0\ul0 kh+la\fs28\i0\b0\ul0\cf0 font 2; ord 61\par
@ -812,8 +812,8 @@
\f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0\\u0F13\fs28\i0\b0\ul0\cf0 font 5; ord 94\par \f1\fs144 >\f6 ^\f1 >\f2\i0\b0\ul0\\u0F13\fs28\i0\b0\ul0\cf0 font 5; ord 94\par
\f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 <\fs28\i0\b0\ul0\cf0 font 5; ord 95\par \f1\fs144 >\f6 _\f1 >\f2\i0\b0\ul0 <\fs28\i0\b0\ul0\cf0 font 5; ord 95\par
\f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 >\fs28\i0\b0\ul0\cf0 font 5; ord 96\par \f1\fs144 >\f6 `\f1 >\f2\i0\b0\ul0 >\fs28\i0\b0\ul0\cf0 font 5; ord 96\par
\f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=97 character=a/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 97\par \f1\fs144 >\f6 a\f1 >\f2\i0\b0\ul0\\u5350\fs28\i0\b0\ul0\cf0 font 5; ord 97\par
\f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0 <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert <glyph font=TibetanMachineWeb8 charNum=98 character=b/> to THDL Extended Wylie. Please see the documentation for the TM or TMW font and transcribe this yourself.]]>>\fs28\i0\b0\ul0\cf0 font 5; ord 98\par \f1\fs144 >\f6 b\f1 >\f2\i0\b0\ul0\\u534D\fs28\i0\b0\ul0\cf0 font 5; ord 98\par
\f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0\\uF038\fs28\i0\b0\ul0\cf0 font 5; ord 99\par \f1\fs144 >\f6 c\f1 >\f2\i0\b0\ul0\\uF038\fs28\i0\b0\ul0\cf0 font 5; ord 99\par
\f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0\\uF037\fs28\i0\b0\ul0\cf0 font 5; ord 100\par \f1\fs144 >\f6 d\f1 >\f2\i0\b0\ul0\\uF037\fs28\i0\b0\ul0\cf0 font 5; ord 100\par
\f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 X\fs28\i0\b0\ul0\cf0 font 5; ord 101\par \f1\fs144 >\f6 e\f1 >\f2\i0\b0\ul0 X\fs28\i0\b0\ul0\cf0 font 5; ord 101\par

View file

@ -18,9 +18,80 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text; package org.thdl.tib.text;
/** This is where basic, static knowledge of THDL's Extended Wylie is housed. /** This is where basic, static knowledge of THDL's Extended Wylie is
* housed. <p>TODO(dchandler): tibwn.ini has all this, yes? So
* extend TibetanMachineWeb if necessary and use a bunch of HashMaps
* there! This is needless duplication.
* @see TibetanMachineWeb */ * @see TibetanMachineWeb */
public interface THDLWylieConstants { public interface THDLWylieConstants {
// TODO(DLC)[EWTS->Tibetan]: what about U+2638, mentioned in Section
// 9.11 "Tibetan" of the Unicode 4.0.1 standard? Why doesn't EWTS
// mention it? (Because TMW has no glyph for it, I bet.) Do we
// handle it well?
/** The EWTS standard mentions this character specifically. See
* http://www.symbols.com/encyclopedia/15/155.html to learn about
* its meaning as relates to Buddhism.
*/
public static final char SAUVASTIKA = '\u534d';
/** The EWTS standard mentions this character specifically. See
* http://www.symbols.com/encyclopedia/15/151.html to learn about
* its meaning as relates to Buddhism.
*/
public static final char SWASTIKA = '\u5350';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
public static final char PUA_MIN = '\uf021';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
public static final char PUA_MAX = '\uf0ff';
/**
* the Wylie for U+0F3E
*/
public static final String U0F3E = "}";
/**
* the Wylie for U+0F3F
*/
public static final String U0F3F = "{";
/**
* the Wylie for U+0F86
*/
public static final String U0F86 = "\\u0F86";
/**
* the Wylie for U+0F87
*/
public static final String U0F87 = "\\u0F87";
/**
* the Wylie for U+0FC6
*/
public static final String U0FC6 = "\\u0FC6";
/**
* the Wylie for U+0F18
*/
public static final String U0F18 = "\\u0F18";
/**
* the Wylie for U+0F19
*/
public static final String U0F19 = "\\u0F19";
/**
* the Wylie for U+0F84
*/
public static final String U0F84 = "?";
/**
* the Wylie for U+0F7F
*/
public static final String U0F7F = "H";
/**
* the Wylie for U+0F35
*/
public static final String U0F35 = "~X";
/**
* the Wylie for U+0F37
*/
public static final String U0F37 = "X";
/** /**
* the Wylie for U+0F82 * the Wylie for U+0F82
*/ */
@ -32,7 +103,7 @@ public interface THDLWylieConstants {
/** /**
* the Wylie for bindu/anusvara (U+0F7E) * the Wylie for bindu/anusvara (U+0F7E)
*/ */
public static final char BINDU = 'M'; public static final String BINDU = "M";
/** /**
* the Wylie for tsheg * the Wylie for tsheg
*/ */
@ -64,31 +135,51 @@ public interface THDLWylieConstants {
*/ */
public static final String WYLIE_TSA_PHRU = "^"; public static final String WYLIE_TSA_PHRU = "^";
/** /**
* the Wylie for achung * the Wylie for achung, \u0f60
*/ */
public static final char ACHUNG_character = '\''; public static final char ACHUNG_character = '\'';
/** /**
* the Wylie for achung * the Wylie for achung, \u0f60
*/ */
public static final String ACHUNG public static final String ACHUNG
= new String(new char[] { ACHUNG_character }); = new String(new char[] { ACHUNG_character });
/** /**
* the Wylie for the 28th of the 30 consonants, sa: * the Wylie for the 28th of the 30 consonants, sa, \u0f66:
*/ */
public static final String SA = "s"; public static final String SA = "s";
/** /**
* the Wylie for the consonant ra: * the Wylie for the consonant ra, \u0f62:
*/ */
public static final String RA = "r"; public static final String RA = "r";
/** /**
* the Wylie for the 16th of the 30 consonants, ma: * the Wylie for the 16th of the 30 consonants, ma, \u0f58:
*/ */
public static final String MA = "m"; public static final String MA = "m";
/** /**
* the Wylie for the 4th of the 30 consonants, nga: * the Wylie for \u0f56:
*/
public static final String BA = "b";
/**
* the Wylie for \u0f51:
*/
public static final String DA = "d";
/**
* the Wylie for \u0f42:
*/
public static final String GA = "g";
/**
* the Wylie for \u0f63:
*/
public static final String LA = "l";
/**
* the Wylie for the 4th of the 30 consonants, nga, \u0f44:
*/ */
public static final String NGA = "ng"; public static final String NGA = "ng";
/** /**
* the Wylie for \u0f53:
*/
public static final String NA = "n";
/**
* the Wylie for achen * the Wylie for achen
*/ */
public static final String ACHEN = "a"; public static final String ACHEN = "a";

View file

@ -418,7 +418,7 @@ public class TibTextUtils implements THDLWylieConstants {
chars.clear(); chars.clear();
if (next.equals(String.valueOf(BINDU))) { if (next.equals(BINDU)) {
if (glyphs.isEmpty()) if (glyphs.isEmpty())
dc = null; dc = null;
else else
@ -560,11 +560,11 @@ public class TibTextUtils implements THDLWylieConstants {
* or null */ * or null */
public static void getBindu(List list, DuffCode dc) { public static void getBindu(List list, DuffCode dc) {
if (null == dc) { if (null == dc) {
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU))); list.add(TibetanMachineWeb.getGlyph(BINDU));
} else { } else {
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) { if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
list.add(dc); list.add(dc);
list.add(TibetanMachineWeb.getGlyph(String.valueOf(BINDU))); list.add(TibetanMachineWeb.getGlyph(BINDU));
} else { } else {
list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc)); list.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
} }

View file

@ -1347,12 +1347,26 @@ public static boolean isKnownHashKey(String hashKey) {
* @see DuffCode * @see DuffCode
*/ */
public static DuffCode getGlyph(String hashKey) { public static DuffCode getGlyph(String hashKey) {
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey); DuffCode dc = maybeGetGlyph(hashKey);
if (null == dc) if (null == dc)
throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears."); throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
return dc;
}
/**
* Gets a glyph for this hash key if possible; returns null
* otherwise.
* @see #getGlyph(String)
*/
public static DuffCode maybeGetGlyph(String hashKey) {
DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
if (null == dc)
return null;
return dc[TMW]; return dc[TMW];
} }
/** /**
* Gets the half height character for this hash key. * Gets the half height character for this hash key.
* @param hashKey the key you want a half height glyph for; see {@link * @param hashKey the key you want a half height glyph for; see {@link
@ -1783,6 +1797,8 @@ private static final String Unicode_tab = "\t";
= new DuffCode[] { new DuffCode(1, (char)58) }; = new DuffCode[] { new DuffCode(1, (char)58) };
private static final DuffCode[] tmwFor0F73 private static final DuffCode[] tmwFor0F73
= new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(1, (char)109) }; = new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(1, (char)109) };
private static final DuffCode[] tmwFor0F75
= new DuffCode[] { new DuffCode(10, (char)126) };
private static final DuffCode[] tmwFor0F76 private static final DuffCode[] tmwFor0F76
= new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(8, (char)87) }; = new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(8, (char)87) };
private static final DuffCode[] tmwFor0F77 private static final DuffCode[] tmwFor0F77
@ -1840,6 +1856,8 @@ private static final String Unicode_tab = "\t";
return tmwFor0F6A; return tmwFor0F6A;
} else if ('\u0F73' == ch) { } else if ('\u0F73' == ch) {
return tmwFor0F73; return tmwFor0F73;
} else if ('\u0F75' == ch) {
return tmwFor0F75;
} else if ('\u0F76' == ch) { } else if ('\u0F76' == ch) {
return tmwFor0F76; return tmwFor0F76;
} else if ('\u0F77' == ch) { } else if ('\u0F77' == ch) {

View file

@ -927,6 +927,15 @@ a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
// nyi.zla editor's mark. This is NOT \u0F82, although it looks very similar. // nyi.zla editor's mark. This is NOT \u0F82, although it looks very similar.
\uF03A~91,5~~9,89~~~~~~~none \uF03A~91,5~~9,89~~~~~~~none
// yungs.drung (reversed):
\u5350~97,5~~9,97~~~~~~~5350
// yungs.drung (standard):
\u534D~98,5~~9,98~~~~~~~534D
// utsama ka:
\u0F88+k~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\u0F88+kh~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
<?ToWylie?> <?ToWylie?>
M~238,1~~8,90~~~~~~~0F7E M~238,1~~8,90~~~~~~~0F7E
@ -1069,13 +1078,13 @@ A~204,2~~4,109~~~~~~~0F71
A~205,2~~4,110~~~~~~~0F71 A~205,2~~4,110~~~~~~~0F71
A~206,2~~4,111~~~~~~~0F71 A~206,2~~4,111~~~~~~~0F71
A~207,2~~4,112~~~~~~~0F71 A~207,2~~4,112~~~~~~~0F71
U~211,2~~4,113~~~~~~~0F75 U~211,2~~4,113~~~~~~~0F71,0F74
U~212,2~~4,114~~~~~~~0F75 U~212,2~~4,114~~~~~~~0F71,0F74
U~213,2~~4,115~~~~~~~0F75 U~213,2~~4,115~~~~~~~0F71,0F74
U~214,2~~4,116~~~~~~~0F75 U~214,2~~4,116~~~~~~~0F71,0F74
U~215,2~~4,117~~~~~~~0F75 U~215,2~~4,117~~~~~~~0F71,0F74
U~216,2~~4,118~~~~~~~0F75 U~216,2~~4,118~~~~~~~0F71,0F74
U~217,2~~4,119~~~~~~~0F75 U~217,2~~4,119~~~~~~~0F71,0F74
u~224,2~~4,120~~~~~~~0F74 u~224,2~~4,120~~~~~~~0F74
u~225,2~~4,121~~~~~~~0F74 u~225,2~~4,121~~~~~~~0F74
u~226,2~~4,122~~~~~~~0F74 u~226,2~~4,122~~~~~~~0F74
@ -1090,13 +1099,13 @@ A~204,3~~6,109~~~~~~~0F71
A~205,3~~6,110~~~~~~~0F71 A~205,3~~6,110~~~~~~~0F71
A~206,3~~6,111~~~~~~~0F71 A~206,3~~6,111~~~~~~~0F71
A~207,3~~6,112~~~~~~~0F71 A~207,3~~6,112~~~~~~~0F71
U~211,3~~6,113~~~~~~~0F75 U~211,3~~6,113~~~~~~~0F71,0F74
U~212,3~~6,114~~~~~~~0F75 U~212,3~~6,114~~~~~~~0F71,0F74
U~213,3~~6,115~~~~~~~0F75 U~213,3~~6,115~~~~~~~0F71,0F74
U~214,3~~6,116~~~~~~~0F75 U~214,3~~6,116~~~~~~~0F71,0F74
U~215,3~~6,117~~~~~~~0F75 U~215,3~~6,117~~~~~~~0F71,0F74
U~216,3~~6,118~~~~~~~0F75 U~216,3~~6,118~~~~~~~0F71,0F74
U~217,3~~6,119~~~~~~~0F75 U~217,3~~6,119~~~~~~~0F71,0F74
u~224,3~~6,120~~~~~~~0F74 u~224,3~~6,120~~~~~~~0F74
u~225,3~~6,121~~~~~~~0F74 u~225,3~~6,121~~~~~~~0F74
u~226,3~~6,122~~~~~~~0F74 u~226,3~~6,122~~~~~~~0F74
@ -1111,13 +1120,13 @@ A~204,4~~8,109~~~~~~~0F71
A~205,4~~8,110~~~~~~~0F71 A~205,4~~8,110~~~~~~~0F71
A~206,4~~8,111~~~~~~~0F71 A~206,4~~8,111~~~~~~~0F71
A~207,4~~8,112~~~~~~~0F71 A~207,4~~8,112~~~~~~~0F71
U~211,4~~8,113~~~~~~~0F75 U~211,4~~8,113~~~~~~~0F71,0F74
U~212,4~~8,114~~~~~~~0F75 U~212,4~~8,114~~~~~~~0F71,0F74
U~213,4~~8,115~~~~~~~0F75 U~213,4~~8,115~~~~~~~0F71,0F74
U~214,4~~8,116~~~~~~~0F75 U~214,4~~8,116~~~~~~~0F71,0F74
U~215,4~~8,117~~~~~~~0F75 U~215,4~~8,117~~~~~~~0F71,0F74
U~216,4~~8,118~~~~~~~0F75 U~216,4~~8,118~~~~~~~0F71,0F74
U~217,4~~8,119~~~~~~~0F75 U~217,4~~8,119~~~~~~~0F71,0F74
u~224,4~~8,120~~~~~~~0F74 u~224,4~~8,120~~~~~~~0F74
u~225,4~~8,121~~~~~~~0F74 u~225,4~~8,121~~~~~~~0F74
u~226,4~~8,122~~~~~~~0F74 u~226,4~~8,122~~~~~~~0F74
@ -1131,13 +1140,13 @@ A~163,1~~10,116~~~~~~~0F71
A~164,1~~10,117~~~~~~~0F71 A~164,1~~10,117~~~~~~~0F71
A~211,1~~10,118~~~~~~~0F71 A~211,1~~10,118~~~~~~~0F71
A~212,1~~10,119~~~~~~~0F71 A~212,1~~10,119~~~~~~~0F71
U~213,1~~10,120~~~~~~~0F75 U~213,1~~10,120~~~~~~~0F71,0F74
U~214,1~~10,121~~~~~~~0F75 U~214,1~~10,121~~~~~~~0F71,0F74
U~215,1~~10,122~~~~~~~0F75 U~215,1~~10,122~~~~~~~0F71,0F74
U~216,1~~10,123~~~~~~~0F75 U~216,1~~10,123~~~~~~~0F71,0F74
U~217,1~~10,124~~~~~~~0F75 U~217,1~~10,124~~~~~~~0F71,0F74
U~218,1~~10,125~~~~~~~0F75 U~218,1~~10,125~~~~~~~0F71,0F74
U~219,1~~10,126~~~~~~~0F75 U~219,1~~10,126~~~~~~~0F71,0F74
// ra.mgo: // ra.mgo:
r~173,4~~8,66~~~~~~~0F62 r~173,4~~8,66~~~~~~~0F62
@ -1191,13 +1200,3 @@ r~176,4~~8,71~~~~~~~0FB2
\tmw8070~67,5~~9,70~~~~~~~none \tmw8070~67,5~~9,70~~~~~~~none
\tmw8071~68,5~~9,71~~~~~~~none \tmw8071~68,5~~9,71~~~~~~~none
\tmw8072~69,5~~9,72~~~~~~~none \tmw8072~69,5~~9,72~~~~~~~none
// yungs.drung (reversed):
\tmw8097~97,5~~9,97~~~~~~~5350
// yungs.drung (standard):
\tmw8098~98,5~~9,98~~~~~~~534D
// utsama ka:
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91

View file

@ -298,7 +298,7 @@ public class UnicodeUtils implements UnicodeConstants {
characters will appear as themselves. */ characters will appear as themselves. */
public static String unicodeCodepointToString(char cp, public static String unicodeCodepointToString(char cp,
boolean shortenIfPossible) { boolean shortenIfPossible) {
return unicodeCodepointToString(cp, shortenIfPossible, "\\u"); return unicodeCodepointToString(cp, shortenIfPossible, "\\u", false);
} }
/** Like {@link #unicodeCodepointToString(char, boolean)} if you /** Like {@link #unicodeCodepointToString(char, boolean)} if you
@ -307,7 +307,8 @@ public class UnicodeUtils implements UnicodeConstants {
<code>0F55</code>. */ <code>0F55</code>. */
public static String unicodeCodepointToString(char cp, public static String unicodeCodepointToString(char cp,
boolean shortenIfPossible, boolean shortenIfPossible,
String prefix) { String prefix,
boolean upperCase) {
if (shortenIfPossible) { if (shortenIfPossible) {
if ((cp >= 'a' && cp <= 'z') if ((cp >= 'a' && cp <= 'z')
|| (cp >= 'A' && cp <= 'Z') || (cp >= 'A' && cp <= 'Z')
@ -348,14 +349,16 @@ public class UnicodeUtils implements UnicodeConstants {
return "\\r"; return "\\r";
} }
String suffix;
if (cp < '\u0010') if (cp < '\u0010')
return prefix + "000" + Integer.toHexString((int)cp); suffix = "000" + Integer.toHexString((int)cp);
else if (cp < '\u0100') else if (cp < '\u0100')
return prefix + "00" + Integer.toHexString((int)cp); suffix = "00" + Integer.toHexString((int)cp);
else if (cp < '\u1000') else if (cp < '\u1000')
return prefix + "0" + Integer.toHexString((int)cp); suffix = "0" + Integer.toHexString((int)cp);
else else
return prefix + Integer.toHexString((int)cp); suffix = Integer.toHexString((int)cp);
return prefix + (upperCase ? suffix.toUpperCase() : suffix);
} }
/** /**

View file

@ -546,10 +546,12 @@ public final class ACIPTraits implements TTraits {
/** Gets the duffcodes for wowel, such that they look good with /** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */ * the preceding glyph, and appends them to duff. */
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) { public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws ConversionException
{
if (null == wowel) return; if (null == wowel) return;
if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion! Use assert. if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion! Use assert.
throw new IllegalArgumentException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly."); throw new ConversionException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
// Order matters here. // Order matters here.
boolean context_added[] = new boolean[] { false }; boolean context_added[] = new boolean[] { false };
@ -619,8 +621,10 @@ public final class ACIPTraits implements TTraits {
try { try {
return TPairListFactory.breakACIPIntoChunks(tt, sh); return TPairListFactory.breakACIPIntoChunks(tt, sh);
} catch (StackOverflowError e) { } catch (StackOverflowError e) {
// TODO(dchandler): use ConversionException? Stop catching these?
throw new IllegalArgumentException("Input too large[1]: " + tt); throw new IllegalArgumentException("Input too large[1]: " + tt);
} catch (OutOfMemoryError e) { } catch (OutOfMemoryError e) {
// TODO(dchandler): use ConversionException? Stop catching these?
throw new IllegalArgumentException("Input too large[2]: " + tt); throw new IllegalArgumentException("Input too large[2]: " + tt);
} }
} }

View file

@ -0,0 +1,30 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2005 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text.ttt;
/**
* @author David Chandler
*
* <p>A ConversionException is a general-purpose checked exception
* used to indicate a problem during conversion.
*/
public final class ConversionException extends Exception {
/** @see Exception.Exception(String) */
ConversionException(String x) { super(x); }
}

View file

@ -19,10 +19,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt; package org.thdl.tib.text.ttt;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.ArrayList;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.tshegbar.UnicodeUtils; import org.thdl.tib.text.tshegbar.UnicodeUtils;
/** Tests this package's ability to understand EWTS and turn it into /** Tests this package's ability to understand EWTS and turn it into
@ -76,42 +78,145 @@ public class EWTSTest extends TestCase {
} }
} }
/** Causes a JUnit test case failure unless the EWTS document ewts /** Returns the Unicode corresponding to the TMW to which ewts
* converts to the unicode expectedUnicode. */ * corresponds, or null if we couldn't push through, even with
static void ewts2uni_test(String ewts, String expectedUnicode) { * errors, from EWTS->TMW->Unicode. */
// TODO(DLC)[EWTS->Tibetan]: In addition to what this private static String ewts2tmw2uni(String ewts) {
// currently does, have this function convert to TMW and TTraits traits = EWTSTraits.instance();
// convert that TMW to Unicode and verify that the result is
// the same. Almost every call should allow for that.
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(), boolean shortMessages = true;
ewts, errors, String warningLevel = "All"; // slow but exercises more code paths
null, true, ArrayList scan
"None", // TODO(DLC)[EWTS->Tibetan]: ??? = traits.scanner().scan(ewts, errors, -1,
false /* short warnings */); shortMessages,
if (null == unicode) { warningLevel);
if (null == scan)
return null;
if (errors.length() > 0)
return null;
errors = new StringBuffer();
TibetanDocument tdoc = new TibetanDocument();
boolean rv;
try {
rv = TConverter.convertToTMW(traits,
scan, tdoc, errors, null, null,
false, warningLevel,
shortMessages, true,
new int[] { tdoc.getLength() });
} catch (java.io.IOException e) {
// I doubt this can happen.
throw new Error(e.toString());
}
if (!rv)
return null;
if (tdoc.getLength() < 1 && ewts.length() > 0)
return null;
errors = new StringBuffer();
long numAttemptedReplacements[] = new long[] { 0 };
tdoc.convertToUnicode(0, tdoc.getLength(), errors, null,
numAttemptedReplacements);
if (errors.length() > 0)
return null;
if (numAttemptedReplacements[0] < 1)
return null;
try {
return tdoc.getText(0, tdoc.getLength());
} catch (javax.swing.text.BadLocationException e) {
throw new Error("I know this won't happen: " + e);
}
}
static void ewts2uni_test(String ewts, String expectedUnicode) {
ewts2uni_test(ewts, expectedUnicode, true);
}
/** Tests EWTS->Unicode but not EWTS->TMW[->Unicode]. */
static void just_ewts2uni_test(String ewts, String expectedUnicode) {
ewts2uni_test(ewts, expectedUnicode, false);
}
/** Causes a JUnit test case failure unless the EWTS document ewts
* converts to the unicode expectedUnicode. If doEwts2tmw2uni is
* true, then this causes a test case failure if an
* EWTS->TMW->Unicode trip doesn't give the same
* expectedUnicode. */
static void ewts2uni_test(String ewts, String expectedUnicode,
boolean doEwts2tmw2uni) {
StringBuffer errors = new StringBuffer();
String unicode
= TConverter.convertToUnicodeText(EWTSTraits.instance(),
ewts, errors,
null, true,
"None", // TODO(DLC)[EWTS->Tibetan]: ???
false /* short warnings */);
help_ewts2uni_test("EWTS->Unicode: ",
ewts, expectedUnicode, unicode, errors);
if (doEwts2tmw2uni) {
help_ewts2uni_test("EWTS->TMW->Unicode: ",
ewts, expectedUnicode, ewts2tmw2uni(ewts),
new StringBuffer());
}
}
/** Doing EWTS->Unicode conversions yields one answer out of many
* for some inputs, such as "b+ha". This function checks for
* equality between two pieces of Unicode modulo such acceptable
* changes. It's only complete enough to handle the test cases
* we have. Why do we make two choices? TMW->Unicode is
* different source code from EWTS->Unicode; that's why. */
private static boolean ewts2uni_unicode_equality(String expectedUnicode,
String actualUnicode) {
// TODO(dchandler): replaceAll is a 1.4-ism. Will users balk?
if (actualUnicode
.replaceAll("\u0f0d\u0f0d", "\u0f0e") // TMW has no \u0f0e glyph
.replaceAll("\u0f69", "\u0f40\u0fb5") // equivalent and neither are discouraged
.replaceAll("\u0f43", "\u0f42\u0fb7") // ditto...
.replaceAll("\u0f4d", "\u0f4c\u0fb7")
.replaceAll("\u0f52", "\u0f51\u0fb7")
.replaceAll("\u0f57", "\u0f56\u0fb7")
.replaceAll("\u0f5c", "\u0f5b\u0fb7")
.replaceAll("\u0fb9", "\u0f90\u0fb5")
.replaceAll("\u0f93", "\u0f92\u0fb7")
.replaceAll("\u0f9d", "\u0f9c\u0fb7")
.replaceAll("\u0fa2", "\u0fa1\u0fb7")
.replaceAll("\u0fa7", "\u0fa6\u0fb7") // ...
.replaceAll("\u0fac", "\u0fab\u0fb7") // equivalent and neither are discouraged
.equals(expectedUnicode)) {
return true;
}
return expectedUnicode.equals(actualUnicode);
}
private static void help_ewts2uni_test(String prefix,
String ewts,
String expectedUnicode,
String actualUnicode,
StringBuffer errors) {
if (null == actualUnicode) {
if (null != expectedUnicode && "none" != expectedUnicode) { if (null != expectedUnicode && "none" != expectedUnicode) {
System.out.println("No unicode exists for " + ewts System.out.println(prefix + "No unicode exists for " + ewts
+ " but you expected " + " but you expected "
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)); + UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
assertTrue(false); assertTrue(false);
} }
System.out.println("Unicode for " + ewts + " can't be had; errors are " + errors); System.out.println(prefix + "Unicode for " + ewts + " can't be had; errors are " + errors);
} else { } else {
if (null != expectedUnicode && !expectedUnicode.equals(unicode)) { if (null != expectedUnicode
explainInequality(unicode, expectedUnicode, System.out); && !ewts2uni_unicode_equality(expectedUnicode, actualUnicode)) {
if (UnicodeUtils.unicodeStringToPrettyString(unicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) { explainInequality(actualUnicode, expectedUnicode, System.out);
System.out.println("UGLY strings: The unicode for\n \"" + ewts if (UnicodeUtils.unicodeStringToPrettyString(actualUnicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
System.out.println(prefix + "UGLY strings: The unicode for\n \"" + ewts
+ "\"\nis\n \"" + "\"\nis\n \""
+ unicode + actualUnicode
+ "\",\nbut you expected\n \"" + "\",\nbut you expected\n \""
+ expectedUnicode + expectedUnicode
+ "\""); + "\"");
} else { } else {
System.out.println("The unicode for\n \"" + ewts System.out.println(prefix + "The unicode for\n \"" + ewts
+ "\"\nis\n \"" + "\"\nis\n \""
+ UnicodeUtils.unicodeStringToPrettyString(unicode) + UnicodeUtils.unicodeStringToPrettyString(actualUnicode)
+ "\",\nbut you expected\n \"" + "\",\nbut you expected\n \""
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode) + UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)
+ "\""); + "\"");
@ -122,7 +227,7 @@ public class EWTSTest extends TestCase {
TPairList[] la TPairList[] la
= EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false); = EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false);
assertTrue(la[1] == null); assertTrue(la[1] == null);
System.out.println("EWTS=" + ewts + " and l'=" + la[0].toString2()); System.out.println(prefix + "EWTS=" + ewts + " and l'=" + la[0].toString2());
} }
assertTrue(false); assertTrue(false);
} }
@ -156,24 +261,25 @@ public class EWTSTest extends TestCase {
public void test0F39() { public void test0F39() {
ewts2uni_test("v", "\u0F56\u0F39"); ewts2uni_test("v", "\u0F56\u0F39");
ewts2uni_test("f", "\u0F55\u0F39"); ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e"); just_ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
ewts2uni_test("faM", "\u0f55\u0f39\u0f7e"); ewts2uni_test("faM", "\u0f55\u0f39\u0f7e");
ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e"); ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e");
ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39"); just_ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39"); just_ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e"); just_ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
ewts2uni_test("a^", "\u0f68\u0f39"); ewts2uni_test("a^", "\u0f68\u0f39");
ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39"); ewts2uni_test("hUM^", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
ewts2uni_test("ph^", "\u0f55\u0f39"); ewts2uni_test("ph^", "\u0f55\u0f39");
ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter? ewts2uni_test("phe^", "\u0f55\u0f39\u0f7a");
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
ewts2uni_test("a\u0f39", "\u0f68\u0f39"); ewts2uni_test("a\u0f39", "\u0f68\u0f39");
ewts2uni_test("hUM\u0f39", "\u0f67\u0f71\u0f74\u0f7e\u0f39"); ewts2uni_test("hUM\u0f39", "\u0f67\u0f39\u0f71\u0f74\u0f7e");
ewts2uni_test("ph\u0f39", "\u0f55\u0f39"); ewts2uni_test("ph\u0f39", "\u0f55\u0f39");
ewts2uni_test("phe\u0f39", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter? ewts2uni_test("phe\u0f39", "\u0f55\u0f39\u0f7a");
ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? ewts2uni_test("ph\u0f39e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!? EWTSTraits.isWowelThatRequiresAChen(..) might be to blame
if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e"); if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
} }
@ -181,6 +287,13 @@ public class EWTSTest extends TestCase {
/** Tests that the EWTS->unicode converter isn't completely /** Tests that the EWTS->unicode converter isn't completely
braindead. */ braindead. */
public void testEwtsBasics() { public void testEwtsBasics() {
just_ewts2uni_test("r+sa", "\u0f62\u0fb6");
ewts2uni_test("R+s", "\u0f6a\u0fb6");
ewts2uni_test("k?e", "\u0f40\u0f84\u0f68\u0f7a");
ewts2uni_test("ko+o", "\u0f40\u0f7c\u0f7c");
ewts2uni_test("kau+u", "\u0f40\u0f74\u0f7d");
ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66");
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51"); ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51"); ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
@ -220,39 +333,46 @@ public class EWTSTest extends TestCase {
ewts2uni_test("b.ra ", "\u0f56\u0f62\u0f0b"); ewts2uni_test("b.ra ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("bara ", "\u0f56\u0f62\u0f0b"); ewts2uni_test("bara ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b"); just_ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
} }
/** Miscellaneous tests of EWTS->Unicode conversion. */ /** Miscellaneous tests of EWTS->Unicode conversion. */
public void test__EWTS__miscellany() { public void test__EWTS__miscellany() {
just_ewts2uni_test("ga\\u0f02ha", "\u0f42\u0f02\u0f67"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("g.\\u0f03\u0f0b", "\u0f42\u0f03\u0f0b"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
ewts2uni_test("", ""); just_ewts2uni_test("k+\u0fb2e", "\u0f40\u0fb2\u0f7a");
assert_EWTS_error("\u0f42ya");
just_ewts2uni_test("\u0f42+ya", "\u0f42\u0fb1");
just_ewts2uni_test("\u0f42.ya", "\u0f42\u0f61");
just_ewts2uni_test("", "");
ewts2uni_test("0\\u0f19", "\u0f20\u0f19"); ewts2uni_test("0\\u0f19", "\u0f20\u0f19");
ewts2uni_test("0\\u0f18", "\u0f20\u0f18"); ewts2uni_test("0\\u0f18", "\u0f20\u0f18");
ewts2uni_test("0\\u0f3e", "\u0f20\u0f3e"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw ewts2uni_test("0\\u0f3e", "\u0f20\u0f3e"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
ewts2uni_test("0\\u0f3f", "\u0f20\u0f3f"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw ewts2uni_test("0\\u0f3f", "\u0f20\u0f3f"); // TODO(DLC)[EWTS->Tibetan]: test ewts->tmw
ewts2uni_test("R", "\u0f6A"); just_ewts2uni_test("R", "\u0f6A");
ewts2uni_test("Ra", "\u0f6A"); just_ewts2uni_test("Ra", "\u0f6A");
ewts2uni_test("R+ka", "\u0F6A\u0f90"); just_ewts2uni_test("R+ka", "\u0F6A\u0f90");
ewts2uni_test("k+Wa", "\u0f40\u0FBA"); just_ewts2uni_test("k+Wa", "\u0f40\u0FBA");
ewts2uni_test("k+Ya", "\u0f40\u0FBB"); just_ewts2uni_test("k+Ya", "\u0f40\u0FBB");
ewts2uni_test("k+Ra", "\u0f40\u0FBC"); just_ewts2uni_test("k+Ra", "\u0f40\u0FBC");
ewts2uni_test("k+wa", "\u0f40\u0Fad"); ewts2uni_test("k+wa", "\u0f40\u0Fad");
ewts2uni_test("k+la", "\u0f40\u0Fb3"); ewts2uni_test("k+la", "\u0f40\u0Fb3");
ewts2uni_test("k+ya", "\u0f40\u0Fb1"); ewts2uni_test("k+ya", "\u0f40\u0Fb1");
ewts2uni_test("k+ra", "\u0f40\u0Fb2"); ewts2uni_test("k+ra", "\u0f40\u0Fb2");
ewts2uni_test("r-I", "\u0f62\u0f81"); ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
ewts2uni_test("l-I", "\u0f63\u0f81"); ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
ewts2uni_test("r-i", "\u0f62\u0f80"); ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("l-i", "\u0f63\u0f80"); ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80"); ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80");
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f81"); ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f71\u0f80");
ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80"); ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80");
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f81"); ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f71\u0f80");
} }
@ -277,9 +397,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("u", "\u0f68\u0f74"); ewts2uni_test("u", "\u0f68\u0f74");
ewts2uni_test("U", "\u0f68\u0f71\u0f74"); ewts2uni_test("U", "\u0f68\u0f71\u0f74");
ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80"); ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80");
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f81"); ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f71\u0f80");
ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80"); just_ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f81"); just_ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f71\u0f80");
ewts2uni_test("e", "\u0f68\u0f7a"); ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("ai", "\u0f68\u0f7b"); ewts2uni_test("ai", "\u0f68\u0f7b");
// ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]: // ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]:
@ -289,11 +409,12 @@ public class EWTSTest extends TestCase {
// ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("-i", "\u0f68\u0f80"); ewts2uni_test("-i", "\u0f68\u0f80");
ewts2uni_test("-I", "\u0f68\u0f81"); ewts2uni_test("-I", "\u0f68\u0f71\u0f80");
// ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("\\u0f68", "\u0f68"); just_ewts2uni_test("\\u0f68", "\u0f68");
ewts2uni_test("\\u0f86", "\u0f68\u0f86"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a\\u0f86", "\u0f68\u0f86"); ewts2uni_test("a\\u0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0f86", "\u0f68\u0f86"); ewts2uni_test("a\\U0f86", "\u0f68\u0f86");
ewts2uni_test("a\\U0F86", "\u0f68\u0f86"); ewts2uni_test("a\\U0F86", "\u0f68\u0f86");
@ -305,7 +426,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("a\\u0f87", "\u0f68\u0f87"); ewts2uni_test("a\\u0f87", "\u0f68\u0f87");
// ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say // ewts2uni_test("aHM", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("a", "\u0f68"); ewts2uni_test("a", "\u0f68");
} }
@ -325,7 +446,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a"); ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a"); ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a");
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7e\u0f7f");
ewts2uni_test("u+A", "\u0f68\u0f74\u0f71"); ewts2uni_test("u+A", "\u0f68\u0f74\u0f71");
ewts2uni_test("o+-I", "DLC"); ewts2uni_test("o+-I", "DLC");
@ -342,9 +463,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ku", "\u0f40\u0f74"); ewts2uni_test("ku", "\u0f40\u0f74");
ewts2uni_test("kU", "\u0f40\u0f71\u0f74"); ewts2uni_test("kU", "\u0f40\u0f71\u0f74");
ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80"); ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80");
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f81"); ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f71\u0f80");
ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80"); ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80");
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f81"); ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f71\u0f80");
ewts2uni_test("ke", "\u0f40\u0f7a"); ewts2uni_test("ke", "\u0f40\u0f7a");
ewts2uni_test("e", "\u0f68\u0f7a"); ewts2uni_test("e", "\u0f68\u0f7a");
ewts2uni_test("a", "\u0f68"); ewts2uni_test("a", "\u0f68");
@ -354,7 +475,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("kaM", "\u0f40\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("kaM", "\u0f40\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("kaH", "\u0f40\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("kaH", "\u0f40\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k-i", "\u0f40\u0f80"); ewts2uni_test("k-i", "\u0f40\u0f80");
ewts2uni_test("k-I", "\u0f40\u0f81"); ewts2uni_test("k-I", "\u0f40\u0f71\u0f80");
ewts2uni_test("ka~M`", "\u0f40\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("ka~M`", "\u0f40\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("ka~M", "\u0f40\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("ka~M", "\u0f40\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("ka?", "\u0f40\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("ka?", "\u0f40\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -369,7 +490,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ka\\u0f87", "\u0f40\u0f87"); ewts2uni_test("ka\\u0f87", "\u0f40\u0f87");
ewts2uni_test("kaMH", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("kaMH", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("kaHM", "\u0f40\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("kaHM", "\u0f40\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is // Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -380,10 +501,10 @@ public class EWTSTest extends TestCase {
ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("ko+e", "\u0f40\u0f7c\u0f7a"); ewts2uni_test("ko+e", "\u0f40\u0f7a\u0f7c");
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a"); ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("ku+A", "\u0f40\u0f74\u0f71"); ewts2uni_test("ku+A", "\u0f40\u0f71\u0f74");
ewts2uni_test("k", "\u0f40"); ewts2uni_test("k", "\u0f40");
ewts2uni_test("ka", "\u0f40"); ewts2uni_test("ka", "\u0f40");
@ -414,7 +535,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'aM", "\u0f60\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'aM", "\u0f60\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'aH", "\u0f60\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'aH", "\u0f60\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'-i", "\u0f60\u0f80"); ewts2uni_test("'-i", "\u0f60\u0f80");
ewts2uni_test("'-I", "\u0f60\u0f81"); ewts2uni_test("'-I", "\u0f60\u0f71\u0f80");
ewts2uni_test("'a~M`", "\u0f60\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'a~M`", "\u0f60\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'a~M", "\u0f60\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'a~M", "\u0f60\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'a?", "\u0f60\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'a?", "\u0f60\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -429,7 +550,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'a\\u0f87", "\u0f60\u0f87"); ewts2uni_test("'a\\u0f87", "\u0f60\u0f87");
ewts2uni_test("'aMH", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'aMH", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("'aHM", "\u0f60\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("'aHM", "\u0f60\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is // Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -440,19 +561,19 @@ public class EWTSTest extends TestCase {
ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("'o+e", "\u0f60\u0f7c\u0f7a"); ewts2uni_test("'o+e", "\u0f60\u0f7a\u0f7c");
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a"); ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("'u+A", "\u0f60\u0f74\u0f71"); ewts2uni_test("'u+A", "\u0f60\u0f71\u0f74");
ewts2uni_test("'", "\u0f60"); ewts2uni_test("'", "\u0f60");
ewts2uni_test("'a", "\u0f60"); ewts2uni_test("'a", "\u0f60");
ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80"); just_ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f81"); just_ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f71\u0f80");
ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80"); just_ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f81"); just_ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f71\u0f80");
} }
/** Tests that our implementation of EWTS's wowels are correct, /** Tests that our implementation of EWTS's wowels are correct,
@ -471,7 +592,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80"); ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80");
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f81"); ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f71\u0f80");
ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
@ -486,7 +607,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87"); ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87");
ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is // Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
@ -497,18 +618,18 @@ public class EWTSTest extends TestCase {
ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7c\u0f7a"); ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7a\u0f7c");
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a"); ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f74\u0f71"); ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f71\u0f74");
ewts2uni_test("k+Sh", "\u0f40\u0fb5"); ewts2uni_test("k+Sh", "\u0f40\u0fb5");
ewts2uni_test("k+Sha", "\u0f40\u0fb5"); ewts2uni_test("k+Sha", "\u0f40\u0fb5");
ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80"); just_ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f81"); just_ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f71\u0f80");
ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80"); ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80");
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f81"); ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f71\u0f80");
} }
/** Tests that our implementation of EWTS's wowels are correct, /** Tests that our implementation of EWTS's wowels are correct,
@ -526,12 +647,12 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c"); ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c");
ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d"); ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d");
ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80"); ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80");
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f81"); ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f71\u0f80");
ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86"); ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84"); assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84");
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86"); ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86"); ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86"); ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84"); ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86"); ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86");
@ -552,10 +673,10 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7c\u0f7a"); ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7c");
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a"); ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f74\u0f71"); ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f71\u0f74");
ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad"); ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad");
ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad"); ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad");
@ -566,7 +687,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
assert_EWTS_error("phywr-i"); assert_EWTS_error("phywr-i");
assert_EWTS_error("phyw+r-i"); assert_EWTS_error("phyw+r-i");
@ -579,55 +700,55 @@ public class EWTSTest extends TestCase {
* (U+0F40,U+0F97,U+0F97,U+0F90,U+0F90,U+0F97) is correct. I * (U+0F40,U+0F97,U+0F97,U+0F90,U+0F90,U+0F97) is correct. I
* chose this stack as an example of an absurd stack. */ * chose this stack as an example of an absurd stack. */
public void test__EWTS__wowels_on_kjjkkj() { public void test__EWTS__wowels_on_kjjkkj() {
ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71"); just_ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72"); just_ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72"); just_ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74"); just_ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74"); just_ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a"); just_ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b"); just_ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c"); just_ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d"); just_ewts2uni_test("k+j+j+k+k+jau", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+jaM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+jaH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80"); just_ewts2uni_test("k+j+j+k+k+j-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f80");
ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f81"); just_ewts2uni_test("k+j+j+k+k+j-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f80");
ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+ja~M`", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+ja~M", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+ja?", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\U0f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\U0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u0F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u00000f86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86"); just_ewts2uni_test("k+j+j+k+k+ja\\u00000F86", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f86");
ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87"); just_ewts2uni_test("k+j+j+k+k+ja\\u0f87", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f87");
ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+jaMH", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say just_ewts2uni_test("k+j+j+k+k+jaHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is // Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
// the same as I and o+o is the same as au. // the same as I and o+o is the same as au.
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72"); just_ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c"); just_ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a"); just_ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? just_ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? just_ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:? just_ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7a"); just_ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7c");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a"); just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c");
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e"); just_ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74\u0f72\u0f7a\u0f7c\u0f7e\u0f7f");
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71"); just_ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97"); just_ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97"); just_ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80"); just_ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f81"); just_ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f71\u0f80");
ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80"); just_ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f81"); just_ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f71\u0f80");
} }
/** Tests that the EWTS that the spec says corresponds to each /** Tests that the EWTS that the spec says corresponds to each
@ -644,14 +765,16 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0000", "\u0000"); ewts2uni_test("\\u0000", "\u0000");
ewts2uni_test("\\u0eff", "\u0eff"); ewts2uni_test("\\u0eff", "\u0eff");
} }
ewts2uni_test("\\u0f00", "\u0f00"); just_ewts2uni_test("\\u0f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
ewts2uni_test("\\u0f40", "\u0f40"); just_ewts2uni_test("\\u0F02", "\u0F02"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("\\u0F03", "\u0F03"); // TODO(DLC)[EWTS->Tibetan]: ewts->tmw is broken
just_ewts2uni_test("\\u0f40", "\u0f40");
if (RUN_FAILING_TESTS) { if (RUN_FAILING_TESTS) {
assert_EWTS_error("\\u0f70"); // reserved codepoint assert_EWTS_error("\\u0f70"); // reserved codepoint
assert_EWTS_error("\\u0fff"); // reserved codepoint assert_EWTS_error("\\u0fff"); // reserved codepoint
ewts2uni_test("\\uf000", "\uf000"); just_ewts2uni_test("\\uf000", "\uf000");
ewts2uni_test("\\uf01f", "\uf01f"); just_ewts2uni_test("\\uf01f", "\uf01f");
ewts2uni_test("\\uefff", "\uefff"); just_ewts2uni_test("\\uefff", "\uefff");
} }
@ -661,11 +784,11 @@ public class EWTSTest extends TestCase {
ewts2uni_test("f", "\u0F55\u0F39"); ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90"); ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91"); ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("\\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("oM", ewts2uni_test("oM",
false ? "\u0F00" : "\u0f68\u0f7c\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: which is correct? see e-mail (maybe it was cfynn who thought \u0F00 ought not be generated? false ? "\u0F00" : "\u0f68\u0f7c\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: which is correct? see e-mail (maybe it was cfynn who thought \u0F00 ought not be generated?
ewts2uni_test("\\u0F01", "\u0F01"); ewts2uni_test("\\u0F01", "\u0F01");
ewts2uni_test("\\u0F02", "\u0F02");
ewts2uni_test("\\u0F03", "\u0F03");
ewts2uni_test("@", "\u0F04"); ewts2uni_test("@", "\u0F04");
ewts2uni_test("#", "\u0F05"); // TODO(DLC)[EWTS->Tibetan]: warning/error? [#] alone is nonsense. ewts2uni_test("#", "\u0F05"); // TODO(DLC)[EWTS->Tibetan]: warning/error? [#] alone is nonsense.
ewts2uni_test("$", "\u0F06"); ewts2uni_test("$", "\u0F06");
@ -777,9 +900,9 @@ public class EWTSTest extends TestCase {
ewts2uni_test("u", achen + "\u0F74"); ewts2uni_test("u", achen + "\u0F74");
ewts2uni_test("U", achen + "\u0F71\u0F74"); ewts2uni_test("U", achen + "\u0F71\u0F74");
ewts2uni_test("a+r-i", achen + "\u0fb2\u0f80"); // not 0F76, which is discouraged by the Unicode standard ewts2uni_test("a+r-i", achen + "\u0fb2\u0f80"); // not 0F76, which is discouraged by the Unicode standard
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f81"); // not 0F77, which is discouraged by the Unicode standard ewts2uni_test("a+r-I", achen + "\u0fb2\u0f71\u0f80"); // not 0F77, which is discouraged by the Unicode standard
ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard just_ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
ewts2uni_test("a+l-I", achen + "\u0fb3\u0f81"); // not 0F79, which is discouraged by the Unicode standard just_ewts2uni_test("a+l-I", achen + "\u0fb3\u0f71\u0f80"); // not 0F79, which is discouraged by the Unicode standard
ewts2uni_test("e", achen + "\u0F7A"); ewts2uni_test("e", achen + "\u0F7A");
ewts2uni_test("ai", achen + "\u0F7B"); ewts2uni_test("ai", achen + "\u0F7B");
ewts2uni_test("o", achen + "\u0F7C"); ewts2uni_test("o", achen + "\u0F7C");
@ -787,7 +910,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("M", achen + "\u0F7E"); ewts2uni_test("M", achen + "\u0F7E");
ewts2uni_test("H", achen + "\u0F7F"); ewts2uni_test("H", achen + "\u0F7F");
ewts2uni_test("-i", achen + "\u0F80"); ewts2uni_test("-i", achen + "\u0F80");
ewts2uni_test("-I", achen + "\u0F81"); ewts2uni_test("-I", achen + "\u0F71\u0F80");
ewts2uni_test("~M`", achen + "\u0F82"); ewts2uni_test("~M`", achen + "\u0F82");
ewts2uni_test("~M", achen + "\u0F83"); ewts2uni_test("~M", achen + "\u0F83");
ewts2uni_test("?", achen + "\u0F84"); // \u0f84 is a combiner ewts2uni_test("?", achen + "\u0F84"); // \u0f84 is a combiner
@ -799,8 +922,8 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0F8A", "\u0F8A"); ewts2uni_test("\\u0F8A", "\u0F8A");
ewts2uni_test("\\u0F8B", "\u0F8B"); ewts2uni_test("\\u0F8B", "\u0F8B");
final String ewts_for_superscript = "tsh+"; final String ewts_for_superscript = "r+";
final String unicode_for_superscript = "\u0f5a"; final String unicode_for_superscript = "\u0f62";
ewts2uni_test(ewts_for_superscript + "k", ewts2uni_test(ewts_for_superscript + "k",
unicode_for_superscript + "\u0F90"); unicode_for_superscript + "\u0F90");
ewts2uni_test(ewts_for_superscript + "kh", ewts2uni_test(ewts_for_superscript + "kh",
@ -812,10 +935,10 @@ public class EWTSTest extends TestCase {
+ (false ? "\u0F93" : "\u0f92\u0fb7")); + (false ? "\u0F93" : "\u0f92\u0fb7"));
ewts2uni_test(ewts_for_superscript + "ng", ewts2uni_test(ewts_for_superscript + "ng",
unicode_for_superscript + "\u0F94"); unicode_for_superscript + "\u0F94");
ewts2uni_test(ewts_for_superscript + "c", just_ewts2uni_test(ewts_for_superscript + "c",
unicode_for_superscript + "\u0F95"); unicode_for_superscript + "\u0F95");
ewts2uni_test(ewts_for_superscript + "ch", just_ewts2uni_test(ewts_for_superscript + "ch",
unicode_for_superscript + "\u0F96"); unicode_for_superscript + "\u0F96");
ewts2uni_test(ewts_for_superscript + "j", ewts2uni_test(ewts_for_superscript + "j",
unicode_for_superscript + "\u0F97"); unicode_for_superscript + "\u0F97");
ewts2uni_test(ewts_for_superscript + "ny", ewts2uni_test(ewts_for_superscript + "ny",
@ -826,9 +949,9 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0F9B"); unicode_for_superscript + "\u0F9B");
ewts2uni_test(ewts_for_superscript + "D", ewts2uni_test(ewts_for_superscript + "D",
unicode_for_superscript + "\u0F9C"); unicode_for_superscript + "\u0F9C");
ewts2uni_test(ewts_for_superscript + "D+h", just_ewts2uni_test(ewts_for_superscript + "D+h",
unicode_for_superscript unicode_for_superscript
+ (false ? "\u0F9D" : "\u0f9c\u0fb7")); + (false ? "\u0F9D" : "\u0f9c\u0fb7"));
ewts2uni_test(ewts_for_superscript + "N", ewts2uni_test(ewts_for_superscript + "N",
unicode_for_superscript + "\u0F9E"); unicode_for_superscript + "\u0F9E");
ewts2uni_test(ewts_for_superscript + "t", ewts2uni_test(ewts_for_superscript + "t",
@ -844,8 +967,8 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0FA3"); unicode_for_superscript + "\u0FA3");
ewts2uni_test(ewts_for_superscript + "p", ewts2uni_test(ewts_for_superscript + "p",
unicode_for_superscript + "\u0FA4"); unicode_for_superscript + "\u0FA4");
ewts2uni_test(ewts_for_superscript + "ph", just_ewts2uni_test(ewts_for_superscript + "ph",
unicode_for_superscript + "\u0FA5"); unicode_for_superscript + "\u0FA5");
ewts2uni_test(ewts_for_superscript + "b", ewts2uni_test(ewts_for_superscript + "b",
unicode_for_superscript + "\u0FA6"); unicode_for_superscript + "\u0FA6");
ewts2uni_test(ewts_for_superscript + "b+h", ewts2uni_test(ewts_for_superscript + "b+h",
@ -859,119 +982,122 @@ public class EWTSTest extends TestCase {
unicode_for_superscript + "\u0FAA"); unicode_for_superscript + "\u0FAA");
ewts2uni_test(ewts_for_superscript + "dz", ewts2uni_test(ewts_for_superscript + "dz",
unicode_for_superscript + "\u0FAB"); unicode_for_superscript + "\u0FAB");
ewts2uni_test(ewts_for_superscript + "dz+h", just_ewts2uni_test(ewts_for_superscript + "dz+h",
unicode_for_superscript unicode_for_superscript
+ (false ? "\u0FAC" : "\u0fab\u0fb7")); + (false ? "\u0FAC" : "\u0fab\u0fb7"));
ewts2uni_test(ewts_for_superscript + "w", ewts2uni_test(ewts_for_superscript + "w",
unicode_for_superscript + "\u0FAD"); unicode_for_superscript + "\u0FAD");
ewts2uni_test(ewts_for_superscript + "zh", just_ewts2uni_test(ewts_for_superscript + "zh",
unicode_for_superscript + "\u0FAE"); unicode_for_superscript + "\u0FAE");
ewts2uni_test(ewts_for_superscript + "z", just_ewts2uni_test(ewts_for_superscript + "z",
unicode_for_superscript + "\u0FAF"); unicode_for_superscript + "\u0FAF");
ewts2uni_test(ewts_for_superscript + "'", just_ewts2uni_test(ewts_for_superscript + "'",
unicode_for_superscript + "\u0FB0"); unicode_for_superscript + "\u0FB0");
ewts2uni_test(ewts_for_superscript + "y", just_ewts2uni_test(ewts_for_superscript + "y",
unicode_for_superscript + "\u0FB1"); unicode_for_superscript + "\u0FB1");
ewts2uni_test(ewts_for_superscript + "r", just_ewts2uni_test(ewts_for_superscript + "r",
unicode_for_superscript + "\u0FB2"); unicode_for_superscript + "\u0FB2");
ewts2uni_test(ewts_for_superscript + "l", ewts2uni_test(ewts_for_superscript + "l",
unicode_for_superscript + "\u0FB3"); unicode_for_superscript + "\u0FB3");
ewts2uni_test(ewts_for_superscript + "sh", just_ewts2uni_test(ewts_for_superscript + "sh",
unicode_for_superscript + "\u0FB4"); unicode_for_superscript + "\u0FB4");
ewts2uni_test(ewts_for_superscript + "Sh", just_ewts2uni_test(ewts_for_superscript + "Sh",
unicode_for_superscript + "\u0FB5"); unicode_for_superscript + "\u0FB5");
ewts2uni_test(ewts_for_superscript + "s", just_ewts2uni_test(ewts_for_superscript + "s",
unicode_for_superscript + "\u0FB6"); unicode_for_superscript + "\u0FB6");
ewts2uni_test(ewts_for_superscript + "h", ewts2uni_test(ewts_for_superscript + "h",
unicode_for_superscript + "\u0FB7"); unicode_for_superscript + "\u0FB7");
ewts2uni_test(ewts_for_superscript + "a", just_ewts2uni_test(ewts_for_superscript + "a",
unicode_for_superscript + "\u0FB8"); unicode_for_superscript + "\u0FB8");
ewts2uni_test(ewts_for_superscript + "k+Sh", ewts2uni_test(ewts_for_superscript + "k+Sh",
unicode_for_superscript unicode_for_superscript
+ (false ? "\u0FB9" : "\u0f90\u0fb5")); + (false ? "\u0FB9" : "\u0f90\u0fb5"));
ewts2uni_test(ewts_for_superscript + "W", just_ewts2uni_test(ewts_for_superscript + "W",
unicode_for_superscript + "\u0FBA"); unicode_for_superscript + "\u0FBA");
ewts2uni_test(ewts_for_superscript + "Y", just_ewts2uni_test(ewts_for_superscript + "Y",
unicode_for_superscript + "\u0FBB"); unicode_for_superscript + "\u0FBB");
ewts2uni_test(ewts_for_superscript + "R", just_ewts2uni_test(ewts_for_superscript + "R",
unicode_for_superscript + "\u0FBC"); unicode_for_superscript + "\u0FBC");
ewts2uni_test("\\u0FBE", "\u0FBE"); just_ewts2uni_test("\\u0FBE", "\u0FBE");
ewts2uni_test("\\u0FBF", "\u0FBF"); just_ewts2uni_test("\\u0FBF", "\u0FBF");
ewts2uni_test("\\u0FC0", "\u0FC0"); just_ewts2uni_test("\\u0FC0", "\u0FC0");
ewts2uni_test("\\u0FC1", "\u0FC1"); just_ewts2uni_test("\\u0FC1", "\u0FC1");
ewts2uni_test("\\u0FC2", "\u0FC2"); just_ewts2uni_test("\\u0FC2", "\u0FC2");
ewts2uni_test("\\u0FC3", "\u0FC3"); just_ewts2uni_test("\\u0FC3", "\u0FC3");
ewts2uni_test("\\u0FC4", "\u0FC4"); just_ewts2uni_test("\\u0FC4", "\u0FC4");
ewts2uni_test("\\u0FC5", "\u0FC5"); just_ewts2uni_test("\\u0FC5", "\u0FC5");
ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner just_ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
ewts2uni_test("\\u0FC7", "\u0FC7"); just_ewts2uni_test("\\u0FC7", "\u0FC7");
ewts2uni_test("\\u0FC8", "\u0FC8"); just_ewts2uni_test("\\u0FC8", "\u0FC8");
ewts2uni_test("\\u0FC9", "\u0FC9"); just_ewts2uni_test("\\u0FC9", "\u0FC9");
ewts2uni_test("\\u0FCA", "\u0FCA"); just_ewts2uni_test("\\u0FCA", "\u0FCA");
ewts2uni_test("\\u0FCB", "\u0FCB"); just_ewts2uni_test("\\u0FCB", "\u0FCB");
ewts2uni_test("\\u0FCC", "\u0FCC"); just_ewts2uni_test("\\u0FCC", "\u0FCC");
ewts2uni_test("\\u0FCF", "\u0FCF"); just_ewts2uni_test("\\u0FCF", "\u0FCF");
ewts2uni_test("\\u0FD0", "\u0FD0"); just_ewts2uni_test("\\u0FD0", "\u0FD0");
ewts2uni_test("\\u0FD1", "\u0FD1"); just_ewts2uni_test("\\u0FD1", "\u0FD1");
ewts2uni_test("_", "\u00a0"); // tibwn.ini says that the Unicode spec wants a non-breaking space. ewts2uni_test("_", "\u00a0"); // tibwn.ini says that the Unicode spec wants a non-breaking space.
ewts2uni_test("\\u534D", "\u534D"); ewts2uni_test("\\u534D", "\u534D");
ewts2uni_test("\\u5350", "\u5350"); ewts2uni_test("\\u5350", "\u5350");
ewts2uni_test("\u534D", "\u534D");
ewts2uni_test("\u5350", "\u5350");
ewts2uni_test("\\u0F88+k", "\u0F88\u0F90"); ewts2uni_test("\\u0F88+k", "\u0F88\u0F90");
ewts2uni_test("\\u0F88+kh", "\u0F88\u0F91"); ewts2uni_test("\\u0F88+kh", "\u0F88\u0F91");
/* TODO(DLC)[EWTS->Tibetan]: /* TODO(DLC)[EWTS->Tibetan]:
Do we want to ever generate \uf021? (NOT \u0f21, but the Do we want to ever generate \uf021? (NOT \u0f21, but the
private-use area (PUA) of Unicode). EWTS->TMW and this private-use area (PUA) of Unicode). EWTS->TMW and this
makes sense, but EWTS->Unicode? */ makes sense, but EWTS->Unicode? Shouldn't we match the
ewts2uni_test("\\uF021", "\uF021"); behavior of TMW->Unicode, regardless? */
ewts2uni_test("\\uF022", "\uF022"); just_ewts2uni_test("\\uF021", "\uF021");
ewts2uni_test("\\uF023", "\uF023"); just_ewts2uni_test("\\uF022", "\uF022");
ewts2uni_test("\\uF024", "\uF024"); just_ewts2uni_test("\\uF023", "\uF023");
ewts2uni_test("\\uF025", "\uF025"); just_ewts2uni_test("\\uF024", "\uF024");
ewts2uni_test("\\uF026", "\uF026"); just_ewts2uni_test("\\uF025", "\uF025");
ewts2uni_test("\\uF027", "\uF027"); just_ewts2uni_test("\\uF026", "\uF026");
ewts2uni_test("\\uF028", "\uF028"); just_ewts2uni_test("\\uF027", "\uF027");
ewts2uni_test("\\uF029", "\uF029"); just_ewts2uni_test("\\uF028", "\uF028");
ewts2uni_test("\\uF02A", "\uF02A"); just_ewts2uni_test("\\uF029", "\uF029");
ewts2uni_test("\\uF02B", "\uF02B"); just_ewts2uni_test("\\uF02A", "\uF02A");
ewts2uni_test("\\uF02C", "\uF02C"); just_ewts2uni_test("\\uF02B", "\uF02B");
ewts2uni_test("\\uF02D", "\uF02D"); just_ewts2uni_test("\\uF02C", "\uF02C");
ewts2uni_test("\\uF02E", "\uF02E"); just_ewts2uni_test("\\uF02D", "\uF02D");
ewts2uni_test("\\uF02F", "\uF02F"); just_ewts2uni_test("\\uF02E", "\uF02E");
ewts2uni_test("\\uF030", "\uF030"); just_ewts2uni_test("\\uF02F", "\uF02F");
ewts2uni_test("\\uF031", "\uF031"); just_ewts2uni_test("\\uF030", "\uF030");
ewts2uni_test("\\uF032", "\uF032"); just_ewts2uni_test("\\uF031", "\uF031");
ewts2uni_test("\\uF033", "\uF033"); just_ewts2uni_test("\\uF032", "\uF032");
ewts2uni_test("\\uF034", "\uF034"); just_ewts2uni_test("\\uF033", "\uF033");
ewts2uni_test("\\uF035", "\uF035"); just_ewts2uni_test("\\uF034", "\uF034");
ewts2uni_test("\\uF036", "\uF036"); just_ewts2uni_test("\\uF035", "\uF035");
ewts2uni_test("\\uF037", "\uF037"); just_ewts2uni_test("\\uF036", "\uF036");
ewts2uni_test("\\uF038", "\uF038"); just_ewts2uni_test("\\uF037", "\uF037");
ewts2uni_test("\\uF039", "\uF039"); just_ewts2uni_test("\\uF038", "\uF038");
ewts2uni_test("\\uF03A", "\uF03A"); just_ewts2uni_test("\\uF039", "\uF039");
ewts2uni_test("\\uF03B", "\uF03B"); just_ewts2uni_test("\\uF03A", "\uF03A");
ewts2uni_test("\\uF03C", "\uF03C"); just_ewts2uni_test("\\uF03B", "\uF03B");
ewts2uni_test("\\uF03D", "\uF03D"); just_ewts2uni_test("\\uF03C", "\uF03C");
ewts2uni_test("\\uF03E", "\uF03E"); just_ewts2uni_test("\\uF03D", "\uF03D");
ewts2uni_test("\\uF03F", "\uF03F"); just_ewts2uni_test("\\uF03E", "\uF03E");
ewts2uni_test("\\uF040", "\uF040"); just_ewts2uni_test("\\uF03F", "\uF03F");
ewts2uni_test("\\uF041", "\uF041"); just_ewts2uni_test("\\uF040", "\uF040");
ewts2uni_test("\\uF042", "\uF042"); just_ewts2uni_test("\\uF041", "\uF041");
just_ewts2uni_test("\\uF042", "\uF042");
} }
public void test__EWTS__long_wowels() { public void test__EWTS__long_wowels() {
ewts2uni_test("k-I~M`~X", "\u0f40\u0f81\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask ewts2uni_test("k-I~M`~X", "\u0f40\u0f71\u0f80\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
} }
public void test__EWTS__32bit_unicode_escapes() { public void test__EWTS__32bit_unicode_escapes() {
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
ewts2uni_test("\\uF0010000", just_ewts2uni_test("\\uF0010000",
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work: "[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
if (RUN_FAILING_TESTS) assert_EWTS_error("\\uF0010000"); // TODO(DLC)[EWTS->Tibetan]: error subsystem is hosed if (RUN_FAILING_TESTS) assert_EWTS_error("\\uF0010000"); // TODO(DLC)[EWTS->Tibetan]: error subsystem is hosed
if (RUN_FAILING_TESTS) { if (RUN_FAILING_TESTS) {
ewts2uni_test("\\ucafe0000", just_ewts2uni_test("\\ucafe0000",
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]"); "[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
// TODO(dchandler): make it "\ucafe0000"); // TODO(dchandler): make it "\ucafe0000");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff"); ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff"); ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
@ -1003,8 +1129,8 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u00000000", "\u0000"); ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff"); ewts2uni_test("\\u00000eff", "\u0eff");
} }
ewts2uni_test("\\u00000f00", "\u0f00"); just_ewts2uni_test("\\u00000f00", "\u0f00"); // TODO(DLC)[EWTS->Tibetan]: EWTS->TMW is broken for this
ewts2uni_test("\\u00000f40", "\u0f40"); just_ewts2uni_test("\\u00000f40", "\u0f40");
if (RUN_FAILING_TESTS) { if (RUN_FAILING_TESTS) {
ewts2uni_test("\\u00000f70", "\u0f70"); ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff"); ewts2uni_test("\\u00000fff", "\u0fff");
@ -1089,22 +1215,33 @@ public class EWTSTest extends TestCase {
if (RUN_FAILING_TESTS) { if (RUN_FAILING_TESTS) {
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid! ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81"); ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f71\u0f80");
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81"); ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f71\u0f80");
} }
} }
public void testMoreMiscellany() { public void testMoreMiscellany() {
ewts2uni_test("k+Sh+R-i", "\u0f40\u0fb5\u0fbc\u0f80");
ewts2uni_test("k\\u0f35", "\u0f40\u0f35");
ewts2uni_test("k\\u0f72", "\u0f40\u0f72");
ewts2uni_test("k\\u0f73", "\u0f40\u0f71\u0f72");
ewts2uni_test("k\\u0f75", "\u0f40\u0f71\u0f74");
ewts2uni_test("k\\u0f3e", "\u0f40\u0f3e");
ewts2uni_test("k\\u0f3f", "\u0f40\u0f3f");
ewts2uni_test("kHai", "\u0f40\u0f7f\u0f68\u0f7b"); // TODO(DLC)[EWTS->Tibetan]: Is this correct?
ewts2uni_test("r-i", "\u0f62\u0f80"); ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("r-I", "\u0f62\u0f81"); ewts2uni_test("r-I", "\u0f62\u0f71\u0f80");
ewts2uni_test("l-i", "\u0f63\u0f80"); ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("l-I", "\u0f63\u0f81"); ewts2uni_test("l-I", "\u0f63\u0f71\u0f80");
ewts2uni_test("ga\u0f0bga ga\\u0F0bga", just_ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42"); "\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga", just_ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42"); "\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
ewts2uni_test("'jam", ewts2uni_test("'jam",
"\u0f60\u0f47\u0f58"); "\u0f60\u0f47\u0f58");
ewts2uni_test("jamX 'jam~X", ewts2uni_test("jamX 'jam~X",

View file

@ -21,6 +21,7 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt; package org.thdl.tib.text.ttt;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import org.thdl.tib.text.tshegbar.UnicodeUtils; import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.DuffCode; import org.thdl.tib.text.DuffCode;
@ -102,124 +103,172 @@ public final class EWTSTraits implements TTraits {
public boolean isWowel(String s) { public boolean isWowel(String s) {
return (getUnicodeForWowel(s) != null); return (getUnicodeForWowel(s) != null);
/* TODO(DLC)[EWTS->Tibetan]: test ko+m+e etc.
// TODO(DLC)[EWTS->Tibetan]: all non-consonant combiners? 0f71 0f87 etc.?
if (s.length() == 1 && isUnicodeWowel(s.charAt(0))) return true;
return ("a".equals(s)
|| "e".equals(s)
|| "i".equals(s)
|| "o".equals(s)
|| "u".equals(s)
|| "U".equals(s)
|| "I".equals(s)
|| "A".equals(s)
|| "-i".equals(s)
|| "-I".equals(s)
|| "au".equals(s)
|| "ai".equals(s)
|| isWowelThatRequiresAChen(s));
// TODO(DLC)[EWTS->Tibetan]:???
*/
} }
public String aVowel() { return "a"; } public String aVowel() { return THDLWylieConstants.WYLIE_aVOWEL; }
public boolean isPostsuffix(String s) { public boolean isPostsuffix(String s) {
return ("s".equals(s) || "d".equals(s)); return ("s".equals(s) || "d".equals(s));
} }
public boolean isPrefix(String l) { public boolean isPrefix(String l) {
return ("'".equals(l) return (THDLWylieConstants.ACHUNG.equals(l)
|| "m".equals(l) || THDLWylieConstants.MA.equals(l)
|| "b".equals(l) || THDLWylieConstants.BA.equals(l)
|| "d".equals(l) || THDLWylieConstants.DA.equals(l)
|| "g".equals(l)); || THDLWylieConstants.GA.equals(l));
} }
public boolean isSuffix(String l) { public boolean isSuffix(String l) {
return ("s".equals(l) return (isPrefix(l)
|| "g".equals(l) || THDLWylieConstants.SA.equals(l)
|| "d".equals(l) || THDLWylieConstants.NGA.equals(l)
|| "m".equals(l) || THDLWylieConstants.NA.equals(l)
|| "'".equals(l) || THDLWylieConstants.LA.equals(l)
|| "b".equals(l) || THDLWylieConstants.RA.equals(l));
|| "ng".equals(l)
|| "n".equals(l)
|| "l".equals(l)
|| "r".equals(l));
} }
/** Returns l, since this is EWTS's traits class. */ /** Returns the best EWTS for l, which is often l but not always
public String getEwtsForConsonant(String l) { return l; } * thanks to Unicode escapes. NOTE: For "\u0f42", you don't want
* to return "g" lest "\\u0f42ya " become the wrong thing under
* EWTS->Unicode. */
public String getEwtsForConsonant(String l) {
return helpGetEwts(l);
}
/** Returns l, since this is EWTS's traits class. */ /** Returns the best EWTS for l, which is often l but not always
public String getEwtsForOther(String l) { return l; } * thanks to Unicode escapes. */
public String getEwtsForOther(String l) {
return helpGetEwts(l);
}
private String helpGetEwts(String l) {
if (l.length() == 1
&& ((l.charAt(0) >= THDLWylieConstants.PUA_MIN
&& l.charAt(0) <= THDLWylieConstants.PUA_MAX)
|| 0 <= "\u0F01\u0F09\u0F0A\u0F10\u0F12\u0F13\u0F15\u0F16\u0F17\u0F18\u0F19\u0F1A\u0F1B\u0F1C\u0F1D\u0F1E\u0F1F\u0F2A\u0F2B\u0F2C\u0F2D\u0F2E\u0F2F\u0F30\u0F31\u0F32\u0F33\u0F36\u0F38\u0F86\u0F87\u0F88\u0F89\u0F8A\u0F8B\u0FBE\u0FBF\u0FC0\u0FC1\u0FC2\u0FC3\u0FC4\u0FC5\u0FC6\u0FC7\u0FC8\u0FC9\u0FCA\u0FCB\u0FCC\u0FCF\u5350\u534D".indexOf(l.charAt(0)))) {
return UnicodeUtils.unicodeCodepointToString(l.charAt(0), false, "\\u", true);
}
if (false) { // TODO(dchandler): it's too late in the game to do this. EWTS->TMW is broken for \u0f00, \u0f02, and \u0f03 right now, fix that.
if ("\u0f02".equals(l)) return "u~M`H"; // too long for a single hash key, see?
if ("\u0f03".equals(l)) return "u~M`:"; // ditto
}
return l;
}
/** Returns l, since this is EWTS's traits class. */ /** Returns l, since this is EWTS's traits class. */
public String getEwtsForWowel(String l) { return l; } public String getEwtsForWowel(String l) { return l; }
public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); } public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) /** If needle is found in haystack, then haystack without the
throws IllegalArgumentException * first instance of needle is returned. Otherwise haystack
* itself is returned. */
private static String removeFirstMatch(String haystack, String needle) {
int ix;
if ((ix = haystack.indexOf(needle)) >= 0) {
StringBuffer sb = new StringBuffer(haystack);
sb.replace(ix, ix + needle.length(), "");
return sb.toString();
}
return haystack;
}
private static HashMap bestEwtsMap = null;
private static String getBestEwtsForSingleWowel(String wowel) {
// NOTE: Not MT-safe
if (null == bestEwtsMap) {
bestEwtsMap = new HashMap(20);
// Unicode-escape sequences are handled early. To be
// correct, we must "unescape" here any Unicode escape to
// whatever tibwn.ini has. (TODO(dchandler): tibwn.ini
// has this info, use that instead of duplicating it in
// this code.)
bestEwtsMap.put("\u0f18", THDLWylieConstants.U0F18);
bestEwtsMap.put("\u0f19", THDLWylieConstants.U0F19);
bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
bestEwtsMap.put("\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU);
bestEwtsMap.put("\u0f3e", THDLWylieConstants.U0F3E);
bestEwtsMap.put("\u0f3f", THDLWylieConstants.U0F3F);
bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
bestEwtsMap.put("\u0f86", THDLWylieConstants.U0F86);
bestEwtsMap.put("\u0f87", THDLWylieConstants.U0F87);
bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
bestEwtsMap.put("\u0f71", THDLWylieConstants.A_VOWEL);
bestEwtsMap.put("\u0f72", THDLWylieConstants.i_VOWEL);
bestEwtsMap.put("\u0f74", THDLWylieConstants.u_VOWEL);
bestEwtsMap.put("\u0f7a", THDLWylieConstants.e_VOWEL);
bestEwtsMap.put("\u0f7b", THDLWylieConstants.ai_VOWEL);
bestEwtsMap.put("\u0f7c", THDLWylieConstants.o_VOWEL);
bestEwtsMap.put("\u0f7d", THDLWylieConstants.au_VOWEL);
bestEwtsMap.put("\u0f7e", THDLWylieConstants.BINDU);
bestEwtsMap.put("\u0f80", THDLWylieConstants.reverse_i_VOWEL);
bestEwtsMap.put("\u0f81", THDLWylieConstants.reverse_I_VOWEL);
bestEwtsMap.put("\u0f73", THDLWylieConstants.I_VOWEL); // not in tibwn.ini
bestEwtsMap.put("\u0f75", THDLWylieConstants.U_VOWEL); // not in tibwn.ini
}
String mapping = (String)bestEwtsMap.get(wowel);
if (null != mapping)
return mapping;
else
return wowel;
}
public void getDuffForWowel(ArrayList duff, DuffCode preceding,
String wowel)
throws ConversionException
{ {
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test. boolean preceding_added[] = new boolean[] { false };
String[] wowels = wowel.split("\\+");
for (int i = 0; i < wowels.length; i++) {
getDuffForSingleWowel(duff, preceding,
getBestEwtsForSingleWowel(wowels[i]),
preceding_added);
}
}
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work. /** Wowels can stack. This works on a single wowel. */
private void getDuffForSingleWowel(ArrayList duff, DuffCode preceding,
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work. String wowel, boolean preceding_added[])
throws ConversionException
// Order matters here. {
boolean context_added[] = new boolean[] { false }; if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { // TODO(dchandler): ka+o deserves at least a warning. kaM, though, does not. Do we handle it?
if (wowel.equals(THDLWylieConstants.WYLIE_aVOWEL)) { TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, preceding_added);
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added); wowel = "";
} else { } else {
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking // We call these combining because the TMW font treats
if (wowel.indexOf(THDLWylieConstants.U_VOWEL) >= 0) { // such a vowel specially depending on the preceding glyph
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added); // with which it combines.
} String combining_wowels[] = new String[] {
if (wowel.indexOf(THDLWylieConstants.reverse_I_VOWEL) >= 0) { // order does not matter
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_I_VOWEL, context_added); THDLWylieConstants.U_VOWEL,
} else if (wowel.indexOf(THDLWylieConstants.I_VOWEL) >= 0) { THDLWylieConstants.reverse_I_VOWEL,
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added); THDLWylieConstants.I_VOWEL,
} THDLWylieConstants.A_VOWEL,
if (wowel.indexOf(THDLWylieConstants.A_VOWEL) >= 0) { THDLWylieConstants.ai_VOWEL,
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added); THDLWylieConstants.reverse_i_VOWEL,
} THDLWylieConstants.i_VOWEL,
if (wowel.indexOf(THDLWylieConstants.ai_VOWEL) >= 0) { THDLWylieConstants.e_VOWEL,
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added); THDLWylieConstants.o_VOWEL,
} else if (wowel.indexOf(THDLWylieConstants.reverse_i_VOWEL) >= 0) { THDLWylieConstants.au_VOWEL,
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added); THDLWylieConstants.u_VOWEL
} else if (wowel.indexOf(THDLWylieConstants.i_VOWEL) >= 0) { };
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added); for (int i = 0; i < combining_wowels.length; i++) {
} if (wowel.equals(combining_wowels[i])) {
if (wowel.indexOf(THDLWylieConstants.e_VOWEL) >= 0) { TibTextUtils.getVowel(duff, preceding, combining_wowels[i],
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added); preceding_added);
} wowel = removeFirstMatch(wowel, combining_wowels[i]);
if (wowel.indexOf(THDLWylieConstants.o_VOWEL) >= 0) { }
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
}
if (wowel.indexOf(THDLWylieConstants.au_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
} else if (wowel.indexOf(THDLWylieConstants.u_VOWEL) >= 0) {
TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
}
if (wowel.indexOf("~X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff.add(TibetanMachineWeb.getGlyph("~X"));
} else if (wowel.indexOf("X") >= 0) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff.add(TibetanMachineWeb.getGlyph("X"));
} }
} }
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate. // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
if (wowel.indexOf(THDLWylieConstants.BINDU) >= 0 if (wowel.equals(THDLWylieConstants.BINDU)) {
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
// rely on the fact that we know every Wylie wowel that
// contains 'M'. Let's, instead, parse the wowel.
&& wowel.indexOf(THDLWylieConstants.U0F82) < 0
&& wowel.indexOf(THDLWylieConstants.U0F83) < 0) {
DuffCode last = null; DuffCode last = null;
if (!context_added[0]) { if (!preceding_added[0]) {
last = preceding; last = preceding;
} else if (duff.size() > 0) { } else if (duff.size() > 0) {
last = (DuffCode)duff.get(duff.size() - 1); last = (DuffCode)duff.get(duff.size() - 1);
@ -227,52 +276,77 @@ public final class EWTSTraits implements TTraits {
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone??? // TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
} }
TibTextUtils.getBindu(duff, last); TibTextUtils.getBindu(duff, last);
context_added[0] = true; preceding_added[0] = true;
wowel = removeFirstMatch(wowel, THDLWylieConstants.BINDU);
} }
if (!context_added[0]) {
if (!preceding_added[0]) {
duff.add(preceding); duff.add(preceding);
preceding_added[0] = true;
} }
if (wowel.indexOf('H') >= 0)
duff.add(TibetanMachineWeb.getGlyph("H")); String standalone_wowels[] = new String[] {
int ix; // order does not matter
if ((ix = wowel.indexOf(THDLWylieConstants.WYLIE_TSA_PHRU)) >= 0) {
// This likely won't look good! TMW has glyphs for [va] // This likely won't look good! TMW has glyphs for [va]
// and [fa], so use that transliteration if you care, not // and [fa], so use that transliteration if you care, not
// [ph^] or [b^]. // [ph^] or [b^].
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.WYLIE_TSA_PHRU)); THDLWylieConstants.WYLIE_TSA_PHRU,
StringBuffer sb = new StringBuffer(wowel); THDLWylieConstants.U0F35,
sb.replace(ix, ix + THDLWylieConstants.WYLIE_TSA_PHRU.length(), ""); THDLWylieConstants.U0F37,
wowel = sb.toString(); THDLWylieConstants.U0F7F,
THDLWylieConstants.U0F82,
THDLWylieConstants.U0F83,
THDLWylieConstants.U0F86,
THDLWylieConstants.U0F87,
THDLWylieConstants.U0F19,
THDLWylieConstants.U0F18,
THDLWylieConstants.U0FC6,
THDLWylieConstants.U0F3E,
THDLWylieConstants.U0F3F,
THDLWylieConstants.U0F84,
};
for (int i = 0; i < standalone_wowels.length; i++) {
if (wowel.equals(standalone_wowels[i])) {
ThdlDebug.verify(preceding_added[0]);
duff.add(TibetanMachineWeb.getGlyph(standalone_wowels[i]));
wowel = removeFirstMatch(wowel, standalone_wowels[i]);
}
} }
if ((ix = wowel.indexOf(THDLWylieConstants.U0F82)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F82));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F82.length(), "");
wowel = sb.toString();
}
if ((ix = wowel.indexOf(THDLWylieConstants.U0F83)) >= 0) {
duff.add(TibetanMachineWeb.getGlyph(THDLWylieConstants.U0F83));
StringBuffer sb = new StringBuffer(wowel);
sb.replace(ix, ix + THDLWylieConstants.U0F83.length(), "");
wowel = sb.toString();
}
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on // We verify that no part of wowel is discarded.
if (wowel.length() > 0) {
throw new ConversionException(
"Full wowel was not handled, there remains: " + wowel);
}
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the
// unicode output? i see (with tmuni font) lone bindus
// without glyphs to stack on
} }
public String getUnicodeForWowel(String wowel) { public String getUnicodeForWowel(String wowel) {
if ("a".equals(wowel)) if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
return ""; return "";
return helpGetUnicodeForWowel(wowel); return helpGetUnicodeForWowel(wowel);
} }
private String helpGetUnicodeForWowel(String wowel) { private String helpGetUnicodeForWowel(String wowel) {
if ("a".equals(wowel)) if (THDLWylieConstants.WYLIE_aVOWEL.equals(wowel))
return null; // ko+a+e is invalid, e.g. return null; // ko+a+e is invalid, e.g.
if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0))) if (wowel.length() == 1 && isUnicodeWowel(wowel.charAt(0))) {
if ("\u0f75".equals(wowel))
return "\u0f71\u0f74"; // \u0f75 is discouraged
if ("\u0f81".equals(wowel))
return "\u0f71\u0f80"; // \u0f81 is discouraged
if ("\u0f73".equals(wowel))
return "\u0f71\u0f72"; // \u0f73 is discouraged
if ("\u0f79".equals(wowel))
return "\u0fb3\u0f81"; // \u0f79 is discouraged
if ("\u0f78".equals(wowel))
return "\u0fb3\u0f80"; // \u0f78 is discouraged
return wowel; return wowel;
}
// handle o+u, etc. // handle o+u, etc.
int i; int i;
if ((i = wowel.indexOf("+")) >= 0) { if ((i = wowel.indexOf("+")) >= 0) {
@ -290,27 +364,27 @@ public final class EWTSTraits implements TTraits {
} else { } else {
// Handle vowels. (TODO(dchandler): tibwn.ini has this // Handle vowels. (TODO(dchandler): tibwn.ini has this
// info, use that instead of duplicating it in this code.) // info, use that instead of duplicating it in this code.)
if ("i".equals(wowel)) return "\u0f72"; if (THDLWylieConstants.i_VOWEL.equals(wowel)) return "\u0f72";
if ("u".equals(wowel)) return "\u0f74"; if (THDLWylieConstants.u_VOWEL.equals(wowel)) return "\u0f74";
if ("A".equals(wowel)) return "\u0f71"; if (THDLWylieConstants.A_VOWEL.equals(wowel)) return "\u0f71";
if ("U".equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged if (THDLWylieConstants.U_VOWEL.equals(wowel)) return "\u0f71\u0f74"; // \u0f75 is discouraged
if ("e".equals(wowel)) return "\u0f7a"; if (THDLWylieConstants.e_VOWEL.equals(wowel)) return "\u0f7a";
if ("o".equals(wowel)) return "\u0f7c"; if (THDLWylieConstants.o_VOWEL.equals(wowel)) return "\u0f7c";
if ("-i".equals(wowel)) return "\u0f80"; if (THDLWylieConstants.reverse_i_VOWEL.equals(wowel)) return "\u0f80";
if ("ai".equals(wowel)) return "\u0f7b"; if (THDLWylieConstants.ai_VOWEL.equals(wowel)) return "\u0f7b";
if ("au".equals(wowel)) return "\u0f7d"; if (THDLWylieConstants.au_VOWEL.equals(wowel)) return "\u0f7d";
if ("-I".equals(wowel)) return "\u0f81"; if (THDLWylieConstants.reverse_I_VOWEL.equals(wowel)) return "\u0f71\u0f80"; // \u0f81 is discouraged
if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged if (THDLWylieConstants.I_VOWEL.equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
// TODO(DLC)[EWTS->Tibetan]: test, test, test. // TODO(DLC)[EWTS->Tibetan]: what about \u0f3e and \u0f3f!!!!
if ("M".equals(wowel)) return "\u0f7e"; if (THDLWylieConstants.BINDU.equals(wowel)) return "\u0f7e";
if ("H".equals(wowel)) return "\u0f7f"; if (THDLWylieConstants.U0F7F.equals(wowel)) return "\u0f7f";
if ("?".equals(wowel)) return "\u0f84"; if (THDLWylieConstants.U0F84.equals(wowel)) return "\u0f84";
if ("~M".equals(wowel)) return "\u0f83"; if (THDLWylieConstants.U0F83.equals(wowel)) return "\u0f83";
if ("~M`".equals(wowel)) return "\u0f82"; if (THDLWylieConstants.U0F82.equals(wowel)) return "\u0f82";
if ("X".equals(wowel)) return "\u0f37"; if (THDLWylieConstants.U0F37.equals(wowel)) return "\u0f37";
if ("~X".equals(wowel)) return "\u0f35"; if (THDLWylieConstants.U0F35.equals(wowel)) return "\u0f35";
if ("^".equals(wowel)) return "\u0f39"; if (THDLWylieConstants.WYLIE_TSA_PHRU.equals(wowel)) return "\u0f39";
return null; return null;
} }
@ -324,9 +398,9 @@ public final class EWTSTraits implements TTraits {
for (int i = 0; i < l.length(); i++) { for (int i = 0; i < l.length(); i++) {
char ch = l.charAt(i); char ch = l.charAt(i);
if ((ch < '\u0f00' || ch > '\u0fff') if ((ch < '\u0f00' || ch > '\u0fff')
&& SAUVASTIKA != ch && THDLWylieConstants.SAUVASTIKA != ch
&& SWASTIKA != ch && THDLWylieConstants.SWASTIKA != ch
&& (ch < PUA_MIN || ch > PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all. && (ch < THDLWylieConstants.PUA_MIN || ch > THDLWylieConstants.PUA_MAX) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
&& '\n' != ch && '\n' != ch
&& '\r' != ch) { && '\r' != ch) {
// TODO(DLC)[EWTS->Tibetan]: Is this the place // TODO(DLC)[EWTS->Tibetan]: Is this the place
@ -346,6 +420,8 @@ public final class EWTSTraits implements TTraits {
if ("Y".equals(l)) return "\u0fbb"; if ("Y".equals(l)) return "\u0fbb";
if ("W".equals(l)) return "\u0fba"; if ("W".equals(l)) return "\u0fba";
// TODO(dchandler): use tibwn.ini -- it has this same info.
// g+h etc. should not be inputs to this function, but for // g+h etc. should not be inputs to this function, but for
// completeness they're here. // completeness they're here.
if ("k".equals(l)) return "\u0F90"; if ("k".equals(l)) return "\u0F90";
@ -455,18 +531,24 @@ public final class EWTSTraits implements TTraits {
public boolean vowelsMayStack() { return true; } public boolean vowelsMayStack() { return true; }
public boolean isWowelThatRequiresAChen(String s) { public boolean isWowelThatRequiresAChen(String s) {
// TODO(DLC)[EWTS->Tibetan]: fix me! // TODO(DLC)[EWTS->Tibetan]: not sure why we pick this subset.
return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0)) // Why don't we use a negative set of regular vowels like "i",
|| "?MHX^".indexOf(s.charAt(0)) >= 0)) // "o", etc.?
|| "~X".equals(s) return ((s.length() == 1
|| "~M".equals(s) && (isUnicodeWowelThatRequiresAChen(s.charAt(0))))
|| "~M`".equals(s) || THDLWylieConstants.BINDU.equals(s)
); || THDLWylieConstants.U0F35.equals(s)
|| THDLWylieConstants.U0F37.equals(s)
|| THDLWylieConstants.U0F7F.equals(s)
|| THDLWylieConstants.U0F82.equals(s)
|| THDLWylieConstants.U0F83.equals(s)
|| THDLWylieConstants.U0F84.equals(s)
|| THDLWylieConstants.WYLIE_TSA_PHRU.equals(s));
} }
public boolean isUnicodeWowelThatRequiresAChen(char ch) { public boolean isUnicodeWowelThatRequiresAChen(char ch) {
// TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits // TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits
return "\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0; return ("\u0f39\u0f35\u0f37\u0f18\u0f19\u0f3e\u0f3f\u0f86\u0f87\u0fc6".indexOf(ch) >= 0);
} }
public boolean couldBeValidStack(TPairList pl) { public boolean couldBeValidStack(TPairList pl) {
@ -485,33 +567,9 @@ public final class EWTSTraits implements TTraits {
public boolean stackingMustBeExplicit() { return true; } public boolean stackingMustBeExplicit() { return true; }
public String U0F7F() { return "H"; } public String U0F7F() { return THDLWylieConstants.U0F7F; }
public String U0F35() { return "~X"; } public String U0F35() { return THDLWylieConstants.U0F35; }
public String U0F37() { return "X"; } public String U0F37() { return THDLWylieConstants.U0F37; }
/** The EWTS standard mentions this character specifically. See
http://www.symbols.com/encyclopedia/15/155.html to learn about
its meaning as relates to Buddhism.
*/
static final char SAUVASTIKA = '\u534d';
/** The EWTS standard mentions this character specifically. See
http://www.symbols.com/encyclopedia/15/151.html to learn about
its meaning as relates to Buddhism.
*/
static final char SWASTIKA = '\u5350';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
static final char PUA_MIN = '\uf021';
/** EWTS has some glyphs not specified by Unicode in the
* private-use area (PUA). EWTS puts them in the range [PUA_MIN,
* PUA_MAX]. (Note that \uf042 is the highest in use as of July
* 2, 2005.) */
static final char PUA_MAX = '\uf0ff';
} }

View file

@ -21,6 +21,8 @@ package org.thdl.tib.text.ttt;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
import org.thdl.tib.text.THDLWylieConstants;
/** /**
* This singleton class is able to break up Strings of EWTS text (for * This singleton class is able to break up Strings of EWTS text (for
* example, an entire sutra file) into tsheg bars, comments, etc. * example, an entire sutra file) into tsheg bars, comments, etc.
@ -76,8 +78,11 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
StringBuffer sb = new StringBuffer(s); StringBuffer sb = new StringBuffer(s);
ExpandEscapeSequences(sb); ExpandEscapeSequences(sb);
int sl = sb.length(); int sl = sb.length();
// TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working // TODO(DLC)[EWTS->Tibetan]:: '@#', in ewts->tmw, is not working (probably because)
// TODO(DLC)[EWTS->Tibetan]:: 'jamX 'jam~X one is not working in ->tmw mode // TODO(DLC)[EWTS->Tibetan]:: '#', in ewts->tmw, is not working
//
// TODO(DLC)[EWTS->Tibetan]:: 'jamX one is not working in ewts->tmw mode in the sense that X appears under the last glyph of the three instead of the middle glyph
//
// TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working // TODO(DLC)[EWTS->Tibetan]:: dzaHsogs is not working
for (int i = 0; i < sl; i++) { // i is modified in the loop, also for (int i = 0; i < sl; i++) { // i is modified in the loop, also
if (isValidInsideTshegBar(sb.charAt(i))) { if (isValidInsideTshegBar(sb.charAt(i))) {
@ -102,14 +107,14 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
al.add(new TString("EWTS", "//", al.add(new TString("EWTS", "//",
TString.TIBETAN_PUNCTUATION)); TString.TIBETAN_PUNCTUATION));
++i; ++i;
} else if ((sb.charAt(i) >= EWTSTraits.PUA_MIN } else if ((sb.charAt(i) >= THDLWylieConstants.PUA_MIN
&& sb.charAt(i) <= EWTSTraits.PUA_MAX) && sb.charAt(i) <= THDLWylieConstants.PUA_MAX)
|| (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17') || (sb.charAt(i) >= '\u0f00' && sb.charAt(i) <= '\u0f17')
|| (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f') || (sb.charAt(i) >= '\u0f1a' && sb.charAt(i) <= '\u0f1f')
|| (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc') || (sb.charAt(i) >= '\u0fbe' && sb.charAt(i) <= '\u0fcc')
|| (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1') || (sb.charAt(i) >= '\u0fcf' && sb.charAt(i) <= '\u0fd1')
|| (EWTSTraits.SAUVASTIKA == sb.charAt(i)) || (THDLWylieConstants.SAUVASTIKA == sb.charAt(i))
|| (EWTSTraits.SWASTIKA == sb.charAt(i)) || (THDLWylieConstants.SWASTIKA == sb.charAt(i))
|| (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i)) || (" /;|!:=_@#$%<>()*&\r\n\t\u0f36\u0f38\u0f89\u0f8a\u0f8b".indexOf(sb.charAt(i))
>= 0)) { >= 0)) {
al.add(new TString("EWTS", sb.substring(i, i+1), al.add(new TString("EWTS", sb.substring(i, i+1),
@ -186,7 +191,31 @@ class EWTSTshegBarScanner extends TTshegBarScanner {
// leave x == -1 // leave x == -1
} }
if (x >= 0) { if (x >= 0) {
sb.replace(i, i + "\\uXXXX".length(), new String(new char[] { (char)x })); String replacement = String.valueOf((char)x);
if (false) {
// This would ruin EWTS->Unicode to
// help EWTS->TMW, so we don't do it.
// TODO(dchandler): Fix EWTS->TMW for
// \u0f02 and \u0f03.
// A nasty little HACK for you:
//
// TODO(dchandler): we may create "ga..u~M`H..ha" which may cause errors
String hack = null;
if ('\u0f02' == x) {
hack = "u~M`H"; // hard-coded EWTS
} else if ('\u0f03' == x) {
hack = "u~M`:"; // hard-coded EWTS
} else if ('\u0f00' == x) {
hack = "oM"; // hard-coded EWTS
}
if (null != hack) {
replacement = "." + hack + "."; // hard-coded EWTS disambiguators
i += replacement.length() - 1;
}
}
sb.replace(i, i + "\\uXXXX".length(), replacement);
continue; continue;
} }
} }

View file

@ -29,6 +29,7 @@ import java.util.ArrayList;
import org.thdl.tib.text.DuffCode; import org.thdl.tib.text.DuffCode;
import org.thdl.tib.text.TibetanDocument; import org.thdl.tib.text.TibetanDocument;
import org.thdl.tib.text.TibetanMachineWeb; import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
@ -699,7 +700,13 @@ public class TConverter {
} else { } else {
String wy = ttraits.getEwtsForOther(s.getText()); String wy = ttraits.getEwtsForOther(s.getText());
if (null == wy) throw new Error("No wylie for ACIP " + s.getText()); if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
duff = new Object[] { TibetanMachineWeb.getGlyph(wy) }; duff = new Object[] { TibetanMachineWeb.maybeGetGlyph(wy) };
if (null == duff[0]) {
duff[0] =
ErrorsAndWarnings.getMessage(
137, shortMessages,
s.getText(), ttraits);
}
} }
} }
} }
@ -730,8 +737,8 @@ public class TConverter {
ThdlDebug.verify(1 == s.getText().length()); ThdlDebug.verify(1 == s.getText().length());
if (null != writer) { if (null != writer) {
char ch = s.getText().charAt(0); char ch = s.getText().charAt(0);
if (ch >= EWTSTraits.PUA_MIN if (ch >= THDLWylieConstants.PUA_MIN
&& ch <= EWTSTraits.PUA_MAX) { && ch <= THDLWylieConstants.PUA_MAX) {
hasErrors = true; hasErrors = true;
String errorMessage = String errorMessage =
"[#ERROR " "[#ERROR "

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.TGCPair; import org.thdl.tib.text.TGCPair;
import org.thdl.tib.text.TibetanMachineWeb; import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
@ -710,47 +711,49 @@ class TPairList {
wylieForConsonant.append(lastPair.getWylie(true, false)); wylieForConsonant.append(lastPair.getWylie(true, false));
String hashKey = wylieForConsonant.toString(); String hashKey = wylieForConsonant.toString();
// Because EWTS has special handling for full-formed if (traits.isACIP()) {
// subjoined consonants, we have special handling here. // Because EWTS has special handling for full-formed
if ("r+y".equals(hashKey)) // subjoined consonants, we have special handling here.
hashKey = "r+Y"; if ("r+y".equals(hashKey))
else if ("y+y".equals(hashKey)) hashKey = "r+Y";
hashKey = "y+Y"; else if ("y+y".equals(hashKey))
else if ("N+D+y".equals(hashKey)) hashKey = "y+Y";
hashKey = "N+D+Y"; else if ("N+D+y".equals(hashKey))
else if ("N+D+r+y".equals(hashKey)) hashKey = "N+D+Y";
hashKey = "N+D+R+y"; else if ("N+D+r+y".equals(hashKey))
else if ("k+Sh+r".equals(hashKey)) hashKey = "N+D+R+y";
hashKey = "k+Sh+R"; else if ("k+Sh+r".equals(hashKey))
hashKey = "k+Sh+R";
// TPair.getWylie(..) returns "W" sometimes when "w" is what // TPair.getWylie(..) returns "W" sometimes when "w" is what
// really should be returned. ("V" always causes "w" to be // really should be returned. ("V" always causes "w" to be
// returned, which is fine.) We'll change "W" to "w" here if // returned, which is fine.) We'll change "W" to "w" here if
// we need to. We do it only for a few known stacks (the ones // we need to. We do it only for a few known stacks (the ones
// in TMW). // in TMW).
if ("W".equals(hashKey)) if ("W".equals(hashKey))
hashKey = "w"; hashKey = "w";
else if ("W+y".equals(hashKey)) else if ("W+y".equals(hashKey))
hashKey = "w+y"; hashKey = "w+y";
else if ("W+r".equals(hashKey)) else if ("W+r".equals(hashKey))
hashKey = "w+r"; hashKey = "w+r";
else if ("W+n".equals(hashKey)) else if ("W+n".equals(hashKey))
hashKey = "w+n"; hashKey = "w+n";
else if ("W+W".equals(hashKey)) else if ("W+W".equals(hashKey))
hashKey = "w+W"; hashKey = "w+W";
if ("r+Y".equals(hashKey) if ("r+Y".equals(hashKey)
|| "r+W".equals(hashKey) || "r+W".equals(hashKey)
|| "r+sh".equals(hashKey) || "r+sh".equals(hashKey)
|| "r+sh+y".equals(hashKey) || "r+sh+y".equals(hashKey)
|| "r+Sh".equals(hashKey) || "r+Sh".equals(hashKey)
|| "r+Sh+N".equals(hashKey) || "r+Sh+N".equals(hashKey)
|| "r+Sh+N+y".equals(hashKey) || "r+Sh+N+y".equals(hashKey)
|| "r+Sh+m".equals(hashKey) || "r+Sh+m".equals(hashKey)
|| "r+Sh+y".equals(hashKey) || "r+Sh+y".equals(hashKey)
|| "r+s".equals(hashKey) || "r+s".equals(hashKey)
) { ) {
hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc. hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
}
} }
if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
@ -774,7 +777,7 @@ class TPairList {
traits.getDuffForWowel(duffsAndErrors, traits.getDuffForWowel(duffsAndErrors,
TibetanMachineWeb.getGlyph(hashKey), TibetanMachineWeb.getGlyph(hashKey),
lastPair.getRight()); lastPair.getRight());
} catch (IllegalArgumentException e) { } catch (ConversionException e) {
// TODO(dchandler): Error 137 isn't the perfect // TODO(dchandler): Error 137 isn't the perfect
// message. Try EWTS [RAM], e.g. to see why. It acts // message. Try EWTS [RAM], e.g. to see why. It acts
// like we're trying to find a single glyph for (R // like we're trying to find a single glyph for (R

View file

@ -20,7 +20,12 @@ Contributor(s): ______________________________________.
package org.thdl.tib.text.ttt; package org.thdl.tib.text.ttt;
import java.util.Arrays;
import java.util.List;
import java.util.Comparator;
import org.thdl.tib.text.TibetanMachineWeb; import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.tib.text.THDLWylieConstants;
/** A factory for creating {@link TPairList TPairLists} from /** A factory for creating {@link TPairList TPairLists} from
* Strings of ACIP. * Strings of ACIP.
@ -374,6 +379,85 @@ class TPairListFactory {
return 0; return 0;
} }
/** Returns a TPair just like tp (sometimes the very same,
* unchanged instance) except that the wowel, if present, is in
* the order that Section 9.11 of the Unicode Standard, version
* 4.0.1, would have us use. */
private static TPair ewtsSortWowels(TPair tp) {
if (tp.getRight() != null
&& tp.getRight().length() > 0
&& !"+".equals(tp.getRight())) {
class WowelComparator implements Comparator {
/** @see
* org.thdl.tib.text.tshegbar.UnicodeUtils#fixSomeOrderingErrorsInTibetanUnicode(StringBuffer) */
private List order = Arrays.asList(new String[] {
// equivalence class:
"\u0f39", THDLWylieConstants.WYLIE_TSA_PHRU,
// equivalence class:
THDLWylieConstants.WYLIE_aVOWEL,
// equivalence class:
"\u0f71", THDLWylieConstants.A_VOWEL,
"\u0f73", THDLWylieConstants.I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f75", THDLWylieConstants.U_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f81", THDLWylieConstants.reverse_I_VOWEL, // TODO(dchandler): in a perfect world, we'd decompose and sort the components.
"\u0f74", THDLWylieConstants.u_VOWEL,
// equivalence class:
"\u0f72", THDLWylieConstants.i_VOWEL,
"\u0f7a", THDLWylieConstants.e_VOWEL,
"\u0f7b", THDLWylieConstants.ai_VOWEL,
"\u0f7c", THDLWylieConstants.o_VOWEL,
"\u0f7d", THDLWylieConstants.au_VOWEL,
"\u0f80", THDLWylieConstants.reverse_i_VOWEL,
// equivalence class:
"\u0f7e", THDLWylieConstants.BINDU,
"\u0f82", THDLWylieConstants.U0F82,
"\u0f83", THDLWylieConstants.U0F83,
"\u0f86", THDLWylieConstants.U0F86,
"\u0f87", THDLWylieConstants.U0F87,
// NOTE: we always say "e" comes before "o" but
// either order would work.
/* TODO(dchandler): should these go with other
* under-line wowels like \u0f74? They're for the
* whole tsheg-bar, so they're oddballs...
*
* bestEwtsMap.put("\u0f35", THDLWylieConstants.U0F35);
*
* bestEwtsMap.put("\u0f37", THDLWylieConstants.U0F37);
*
* bestEwtsMap.put("\u0f84", THDLWylieConstants.U0F84);
*
* bestEwtsMap.put("\u0fc6", THDLWylieConstants.U0FC6);
*/
});
public int compare(Object o1, Object o2) {
int i1 = order.indexOf(o1);
int i2 = order.indexOf(o2);
if (i1 < 0) i1 = order.size();
if (i2 < 0) i2 = order.size();
return i1 - i2;
}
}
String wowels[] = tp.getRight().split("\\+");
java.util.Arrays.sort(wowels, new WowelComparator());
StringBuffer sb = new StringBuffer();
for (int i = 0; i < wowels.length; i++) {
sb.append(wowels[i]);
if (i + 1 < wowels.length)
sb.append('+');
}
return new TPair(tp.getTraits(), tp.getLeft(), sb.toString());
} else {
return tp;
}
}
// TODO(DLC)[EWTS->Tibetan]: doc // TODO(DLC)[EWTS->Tibetan]: doc
private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) { private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {
@ -383,7 +467,9 @@ class TPairListFactory {
StringBuffer ewtsBuf = new StringBuffer(ewts); StringBuffer ewtsBuf = new StringBuffer(ewts);
int howMuchBuf[] = new int[1]; int howMuchBuf[] = new int[1];
TPair head = getFirstConsonantAndVowel(ewtsBuf, howMuchBuf, ttraits); TPair head = ewtsSortWowels(getFirstConsonantAndVowel(ewtsBuf,
howMuchBuf,
ttraits));
int howMuch = howMuchBuf[0]; int howMuch = howMuchBuf[0];
TPairList tail; TPairList tail;
@ -448,7 +534,7 @@ class TPairListFactory {
* {N+YE} or an error or whatever you like. howMuch[0] will be * {N+YE} or an error or whatever you like. howMuch[0] will be
* set to the number of characters of tx that this call has * set to the number of characters of tx that this call has
* consumed. */ * consumed. */
private static TPair getFirstConsonantAndVowel(StringBuffer tx, // TODO(DLC)[EWTS->Tibetan]: function name needs ACIP in it? private static TPair getFirstConsonantAndVowel(StringBuffer tx,
int howMuch[], int howMuch[],
TTraits ttraits) { TTraits ttraits) {
// To handle EWTS "phywa\\u0f84\u0f86" [yes that's two slashes // To handle EWTS "phywa\\u0f84\u0f86" [yes that's two slashes

View file

@ -21,6 +21,7 @@ package org.thdl.tib.text.ttt;
import java.util.HashSet; import java.util.HashSet;
import org.thdl.tib.text.tshegbar.UnicodeUtils; import org.thdl.tib.text.tshegbar.UnicodeUtils;
import org.thdl.tib.text.THDLWylieConstants;
import org.thdl.util.ThdlDebug; import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlOptions; import org.thdl.util.ThdlOptions;
@ -66,8 +67,8 @@ public class TString {
&& type != END_SLASH && type != END_SLASH
&& (type != UNICODE_CHARACTER && (type != UNICODE_CHARACTER
|| !(UnicodeUtils.isInTibetanRange(ch = getText().charAt(0)) || !(UnicodeUtils.isInTibetanRange(ch = getText().charAt(0))
|| (ch >= EWTSTraits.PUA_MIN || (ch >= THDLWylieConstants.PUA_MIN
&& ch <= EWTSTraits.PUA_MAX)))); && ch <= THDLWylieConstants.PUA_MAX))));
} }
/** For ACIP [#COMMENTS] and EWTS (DLC FIXME: what are EWTS comments?) */ /** For ACIP [#COMMENTS] and EWTS (DLC FIXME: what are EWTS comments?) */

View file

@ -136,7 +136,8 @@ public interface TTraits {
/** Gets the duffcodes for wowel, such that they look good with /** Gets the duffcodes for wowel, such that they look good with
* the preceding glyph, and appends them to duff. */ * the preceding glyph, and appends them to duff. */
void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel); void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel)
throws ConversionException;
/** Human-readable name of this transliteration for short error /** Human-readable name of this transliteration for short error
strings. */ strings. */

View file

@ -43,7 +43,7 @@ public class VerboseUnicodeDump {
java.nio.charset.Charset.forName(args[1])); java.nio.charset.Charset.forName(args[1]));
int x; int x;
while (-1 != (x = fr.read())) { while (-1 != (x = fr.read())) {
System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "")); System.out.println(org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeCodepointToString((char)x, false, "", false));
} }
System.exit(0); System.exit(0);
} }