Tremendously better EWTS->Unicode and EWTS->TMW conversion, though still not tested end-to-end and without perfect unit tests. See EWTSTest.RUN_FAILING_TESTS, for example, to find imperfection.

This commit is contained in:
dchandler 2005-07-06 02:19:38 +00:00
parent affb9e4b5e
commit 0b3a636f63
20 changed files with 797 additions and 350 deletions

View file

@ -140,18 +140,51 @@ public class EWTSTest extends TestCase {
* legal EWTS transliteration. */
static void assert_EWTS_error(String ewts) {
boolean ewts_error = hasEwtsError(ewts);
assertTrue(ewts_error);
if (!ewts_error) {
System.out.println("assert_EWTS_error: We expected a conversion"
+ " error for the EWTS snippet '"
+ ewts + "' but found none.");
assertTrue(ewts_error);
}
}
/** Tests that the EWTS->unicode converter isn't completely
braindead. */
public void testEwtsBasics() {
ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
ewts2uni_test("a ", "\u0f68\u0f0b");
ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");
ewts2uni_test("khyAH", "\u0f41\u0fb1\u0f71\u0f7f");
ewts2uni_test("'ajamH", "\u0f60\u0f47\u0f58\u0f7f");
assert_EWTS_error("'jamH"); // If we decide this should be legal, TPairList.populateWithTGCPairs is easily modified.
ewts2uni_test("'jam~X", "\u0f60\u0f47\u0f58\u0f35");
ewts2uni_test("'jam~XX", "\u0f60\u0f47\u0f58\u0f35\u0f37");
ewts2uni_test("'jamX~X", "\u0f60\u0f47\u0f58\u0f37\u0f35");
ewts2uni_test("'jamX", "\u0f60\u0f47\u0f58\u0f37");
// prefix rules say this is illegal. use [bana] or [b.na] if
// you want those.
assert_EWTS_error("bna ");
ewts2uni_test("ma", "\u0f58");
ewts2uni_test("mi", "\u0f58\u0f72");
ewts2uni_test("mi ", "\u0f58\u0f72\u0f0b");
ewts2uni_test("mi/", "\u0f58\u0f72\u0f0d");
// ra does not take a ba prefix, no, but b+ra is a native Tibetan stack.
ewts2uni_test("bra ", "\u0f56\u0fb2\u0f0b");
ewts2uni_test("b+ra ", "\u0f56\u0fb2\u0f0b");
ewts2uni_test("bka", "\u0f56\u0f40");
ewts2uni_test("bs+ra ", "\u0f56\u0f66\u0fb2\u0f0b");
ewts2uni_test("bsra ", "\u0f56\u0f66\u0fb2\u0f0b");
ewts2uni_test("bsrag", "\u0f56\u0f66\u0fb2\u0f42");
ewts2uni_test("bsragd", "\u0f56\u0f66\u0fb2\u0f42\u0f51");
assert_EWTS_error("bsragde");
ewts2uni_test("bsrU*", "\u0f56\u0f66\u0fb2\u0f71\u0f74\u0f0c");
ewts2uni_test("b.ra ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("bara ", "\u0f56\u0f62\u0f0b");
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
}
@ -243,7 +276,7 @@ public class EWTSTest extends TestCase {
}
public void test__EWTS__stacked_wowels_on_achen() {
if (false) { // TODO(DLC)[EWTS->Tibetan]: make this true ASAP
if (RUN_FAILING_TESTS) { // TODO(DLC)[EWTS->Tibetan]: make this true ASAP
ewts2uni_test("o+o", "\u0f68\u0f7c\u0f7c");
assert_EWTS_error("a+o"); // TODO(DLC)[EWTS->Tibetan]:?
assert_EWTS_error("o+a"); // TODO(DLC)[EWTS->Tibetan]:?
@ -565,22 +598,26 @@ public class EWTSTest extends TestCase {
/** Tests that the EWTS that the spec says corresponds to each
* codepoint really does. */
public void test__EWTS__tags_each_unicode_value() {
ewts2uni_test("\\u0ef0", "\u0ef0");
for (char i = '\u0ef0'; i < '\u1010'; i++) {
// invalid codepoint like U+0F48? No problem! TODO(DLC)[EWTS->Tibetan]: NOTE: use a unicode "spell checker" to find such problems
String s = new String(new char[] { i });
ewts2uni_test(UnicodeUtils.unicodeStringToPrettyString(s), s);
ewts2uni_test("\\" + UnicodeUtils.unicodeStringToPrettyString(s), s);
if (RUN_FAILING_TESTS) {
ewts2uni_test("\\u0ef0", "\u0ef0");
for (char i = '\u0ef0'; i < '\u1010'; i++) {
// invalid codepoint like U+0F48? No problem! TODO(DLC)[EWTS->Tibetan]: NOTE: use a unicode "spell checker" to find such problems
String s = new String(new char[] { i });
ewts2uni_test(UnicodeUtils.unicodeStringToPrettyString(s), s);
ewts2uni_test("\\" + UnicodeUtils.unicodeStringToPrettyString(s), s);
}
ewts2uni_test("\\u0000", "\u0000");
ewts2uni_test("\\u0eff", "\u0eff");
}
ewts2uni_test("\\u0000", "\u0000");
ewts2uni_test("\\u0eff", "\u0eff");
ewts2uni_test("\\u0f00", "\u0f00");
ewts2uni_test("\\u0f40", "\u0f40");
assert_EWTS_error("\\u0f70"); // reserved codepoint
assert_EWTS_error("\\u0fff"); // reserved codepoint
ewts2uni_test("\\uf000", "\uf000");
ewts2uni_test("\\uf01f", "\uf01f");
ewts2uni_test("\\uefff", "\uefff");
if (RUN_FAILING_TESTS) {
assert_EWTS_error("\\u0f70"); // reserved codepoint
assert_EWTS_error("\\u0fff"); // reserved codepoint
ewts2uni_test("\\uf000", "\uf000");
ewts2uni_test("\\uf01f", "\uf01f");
ewts2uni_test("\\uefff", "\uefff");
}
// Below was semiautomatically generated from the EWTS spec's
@ -589,12 +626,13 @@ public class EWTSTest extends TestCase {
ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
ewts2uni_test("oM", "\u0F00");
ewts2uni_test("oM",
false ? "\u0F00" : "\u0f68\u0f7c\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: which is correct? see e-mail (maybe it was cfynn who thought \u0F00 ought not be generated?
ewts2uni_test("\\u0F01", "\u0F01");
ewts2uni_test("\\u0F02", "\u0F02");
ewts2uni_test("\\u0F03", "\u0F03");
ewts2uni_test("@", "\u0F04");
ewts2uni_test("#", "\u0F05");
ewts2uni_test("#", "\u0F05"); // TODO(DLC)[EWTS->Tibetan]: warning/error? [#] alone is nonsense.
ewts2uni_test("$", "\u0F06");
ewts2uni_test("%", "\u0F07");
ewts2uni_test("!", "\u0F08");
@ -603,7 +641,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test(" ", "\u0F0B");
ewts2uni_test("*", "\u0F0C");
ewts2uni_test("/", "\u0F0D");
ewts2uni_test("//", "\u0F0E");
if (RUN_FAILING_TESTS) ewts2uni_test("//", "\u0F0E");
ewts2uni_test(";", "\u0F0F");
ewts2uni_test("\\u0F10", "\u0F10");
ewts2uni_test("|", "\u0F11");
@ -613,8 +651,8 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0F15", "\u0F15");
ewts2uni_test("\\u0F16", "\u0F16");
ewts2uni_test("\\u0F17", "\u0F17");
ewts2uni_test("\\u0F18", "\u0F18"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("\\u0F19", "\u0F19"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("\\u0F18", "\u0F18"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("\\u0F19", "\u0F19"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("\\u0F1A", "\u0F1A");
ewts2uni_test("\\u0F1B", "\u0F1B");
ewts2uni_test("\\u0F1C", "\u0F1C");
@ -642,21 +680,21 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0F32", "\u0F32");
ewts2uni_test("\\u0F33", "\u0F33");
ewts2uni_test("=", "\u0F34");
ewts2uni_test("~X", "\u0F35");
if (RUN_FAILING_TESTS) ewts2uni_test("~X", "\u0F35");
ewts2uni_test("\\u0F36", "\u0F36");
ewts2uni_test("X", "\u0F37"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("X", "\u0F37"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("\\u0F38", "\u0F38");
ewts2uni_test("^", "\u0F39"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("^", "\u0F39"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("<", "\u0F3A");
ewts2uni_test(">", "\u0F3B");
ewts2uni_test("(", "\u0F3C");
ewts2uni_test(")", "\u0F3D");
ewts2uni_test("\\u0F3E", "\u0F3E"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("\\u0F3F", "\u0F3F"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("\\u0F3E", "\u0F3E"); // TODO(DLC)[EWTS->Tibetan]: error combiner
if (RUN_FAILING_TESTS) ewts2uni_test("\\u0F3F", "\u0F3F"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("k", "\u0F40");
ewts2uni_test("kh", "\u0F41");
ewts2uni_test("g", "\u0F42");
ewts2uni_test("g+h", "\u0F43");
ewts2uni_test("g+h", false ? "\u0F43" : "\u0f42\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: either is acceptable, yes?
ewts2uni_test("ng", "\u0F44");
ewts2uni_test("c", "\u0F45");
ewts2uni_test("ch", "\u0F46");
@ -665,22 +703,22 @@ public class EWTSTest extends TestCase {
ewts2uni_test("T", "\u0F4A");
ewts2uni_test("Th", "\u0F4B");
ewts2uni_test("D", "\u0F4C");
ewts2uni_test("D+h", "\u0F4D");
ewts2uni_test("D+h", false ? "\u0F4D" : "\u0f4c\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: either is acceptable, yes?
ewts2uni_test("N", "\u0F4E");
ewts2uni_test("t", "\u0F4F");
ewts2uni_test("th", "\u0F50");
ewts2uni_test("d", "\u0F51");
ewts2uni_test("d+h", "\u0F52");
ewts2uni_test("d+h", false ? "\u0F52" : "\u0f51\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: either is acceptable, yes?
ewts2uni_test("n", "\u0F53");
ewts2uni_test("p", "\u0F54");
ewts2uni_test("ph", "\u0F55");
ewts2uni_test("b", "\u0F56");
ewts2uni_test("b+h", "\u0F57");
ewts2uni_test("b+h", false ? "\u0F57" : "\u0f56\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: either is acceptable, yes?
ewts2uni_test("m", "\u0F58");
ewts2uni_test("ts", "\u0F59");
ewts2uni_test("tsh", "\u0F5A");
ewts2uni_test("dz", "\u0F5B");
ewts2uni_test("dz+h", "\u0F5C");
ewts2uni_test("dz+h", false ? "\u0F5C" : "\u0f5b\u0fb7"); // TODO(DLC)[EWTS->Tibetan]: either is acceptable, yes?
ewts2uni_test("w", "\u0F5D");
ewts2uni_test("zh", "\u0F5E");
ewts2uni_test("z", "\u0F5F");
@ -694,78 +732,133 @@ public class EWTSTest extends TestCase {
ewts2uni_test("h", "\u0F67");
ewts2uni_test("a", "\u0F68");
ewts2uni_test("k+Sh", "\u0f40\u0fb5"); // there is no way in EWTS to specify \u0f69 in particular without using \\u0f69
ewts2uni_test("R+", "\u0F6A"); // TODO(DLC)[EWTS->Tibetan]: move to illegal test
ewts2uni_test("A", "\u0F71"); // TODO(DLC)[EWTS->Tibetan]: no?! see above
ewts2uni_test("i", "\u0F72");
ewts2uni_test("I", "\u0F71\u0F72");
ewts2uni_test("u", "\u0F74");
ewts2uni_test("U", "\u0F71\u0F74");
ewts2uni_test("r-i", "\u0F76");
ewts2uni_test("r-I", "\u0F77");
ewts2uni_test("l-i", "\u0F78");
ewts2uni_test("l-I", "\u0F79");
ewts2uni_test("e", "\u0F7A");
ewts2uni_test("ai", "\u0F7B");
ewts2uni_test("o", "\u0F7C");
ewts2uni_test("au", "\u0F7D");
ewts2uni_test("M", "\u0F7E");
ewts2uni_test("H", "\u0F7F");
ewts2uni_test("-i", "\u0F80");
ewts2uni_test("-I", "\u0F81");
ewts2uni_test("~M`", "\u0F82");
ewts2uni_test("~M", "\u0F83");
ewts2uni_test("?", "\u0F84");
ewts2uni_test("&", "\u0F85");
ewts2uni_test("\\u0F86", "\u0F86");
ewts2uni_test("\\u0F87", "\u0F87");
if (RUN_FAILING_TESTS) ewts2uni_test("R+", "\u0F6A"); // TODO(DLC)[EWTS->Tibetan]: move to illegal test
final String achen = "\u0f68"; // TODO(DLC)[EWTS->Tibetan]: "i" is "\u0f68\u0f72" for sure, but must you say [aA] instead of [A] to get "\u0f68\u0f71"? What about [?], [&], [~M`]? Every place this variable is used, please consider.
ewts2uni_test("A", achen + "\u0F71");
ewts2uni_test("i", achen + "\u0F72");
ewts2uni_test("I", achen + "\u0F71\u0F72");
ewts2uni_test("u", achen + "\u0F74");
ewts2uni_test("U", achen + "\u0F71\u0F74");
ewts2uni_test("a+r-i", achen + "\u0fb2\u0f80"); // not 0F76, which is discouraged by the Unicode standard
ewts2uni_test("a+r-I", achen + "\u0fb2\u0f81"); // not 0F77, which is discouraged by the Unicode standard
ewts2uni_test("a+l-i", achen + "\u0fb3\u0f80"); // not 0F78, which is discouraged by the Unicode standard
ewts2uni_test("a+l-I", achen + "\u0fb3\u0f81"); // not 0F79, which is discouraged by the Unicode standard
ewts2uni_test("e", achen + "\u0F7A");
ewts2uni_test("ai", achen + "\u0F7B");
ewts2uni_test("o", achen + "\u0F7C");
ewts2uni_test("au", achen + "\u0F7D");
ewts2uni_test("M", achen + "\u0F7E");
ewts2uni_test("H", achen + "\u0F7F");
ewts2uni_test("-i", achen + "\u0F80");
ewts2uni_test("-I", achen + "\u0F81");
ewts2uni_test("~M`", achen + "\u0F82");
ewts2uni_test("~M", achen + "\u0F83");
ewts2uni_test("?", achen + "\u0F84"); // \u0f84 is a combiner
ewts2uni_test("&", "\u0F85"); // I'm pretty sure this should be without achen.
ewts2uni_test("\\u0F86", achen + "\u0F86");
ewts2uni_test("\\u0F87", achen + "\u0F87"); // \u0f87 is a combiner
ewts2uni_test("\\u0F88", "\u0F88");
ewts2uni_test("\\u0F89", "\u0F89");
ewts2uni_test("\\u0F8A", "\u0F8A");
ewts2uni_test("\\u0F8B", "\u0F8B");
ewts2uni_test("k", "\u0F90"); // TODO(DLC)[EWTS->Tibetan]: NO! Need a+...
ewts2uni_test("kh", "\u0F91");
ewts2uni_test("g", "\u0F92");
ewts2uni_test("g+h", "\u0F93");
ewts2uni_test("ng", "\u0F94");
ewts2uni_test("c", "\u0F95");
ewts2uni_test("ch", "\u0F96");
ewts2uni_test("j", "\u0F97");
ewts2uni_test("ny", "\u0F99");
ewts2uni_test("T", "\u0F9A");
ewts2uni_test("Th", "\u0F9B");
ewts2uni_test("D", "\u0F9C");
ewts2uni_test("D+h", "\u0F9D");
ewts2uni_test("N", "\u0F9E");
ewts2uni_test("t", "\u0F9F");
ewts2uni_test("th", "\u0FA0");
ewts2uni_test("d", "\u0FA1");
ewts2uni_test("d+h", "\u0FA2");
ewts2uni_test("n", "\u0FA3");
ewts2uni_test("p", "\u0FA4");
ewts2uni_test("ph", "\u0FA5");
ewts2uni_test("b", "\u0FA6");
ewts2uni_test("b+h", "\u0FA7");
ewts2uni_test("m", "\u0FA8");
ewts2uni_test("ts", "\u0FA9");
ewts2uni_test("tsh", "\u0FAA");
ewts2uni_test("dz", "\u0FAB");
ewts2uni_test("dz+h", "\u0FAC");
ewts2uni_test("w", "\u0FAD");
ewts2uni_test("zh", "\u0FAE");
ewts2uni_test("z", "\u0FAF");
ewts2uni_test("'", "\u0FB0");
ewts2uni_test("y", "\u0FB1");
ewts2uni_test("r", "\u0FB2");
ewts2uni_test("l", "\u0FB3");
ewts2uni_test("sh", "\u0FB4");
ewts2uni_test("Sh", "\u0FB5");
ewts2uni_test("s", "\u0FB6");
ewts2uni_test("h", "\u0FB7");
ewts2uni_test("a", "\u0FB8");
ewts2uni_test("k+Sh", "\u0FB9");
ewts2uni_test("+W", "\u0FBA"); // TODO(DLC)[EWTS->Tibetan]: move to illegal test
ewts2uni_test("+Y", "\u0FBB");
ewts2uni_test("+R", "\u0FBC");
final String ewts_for_superscript = "tsh+";
final String unicode_for_superscript = "\u0f5a";
ewts2uni_test(ewts_for_superscript + "k",
unicode_for_superscript + "\u0F90");
ewts2uni_test(ewts_for_superscript + "kh",
unicode_for_superscript + "\u0F91");
ewts2uni_test(ewts_for_superscript + "g",
unicode_for_superscript + "\u0F92");
ewts2uni_test(ewts_for_superscript + "g+h",
unicode_for_superscript
+ (false ? "\u0F93" : "\u0f92\u0fb7"));
ewts2uni_test(ewts_for_superscript + "ng",
unicode_for_superscript + "\u0F94");
ewts2uni_test(ewts_for_superscript + "c",
unicode_for_superscript + "\u0F95");
ewts2uni_test(ewts_for_superscript + "ch",
unicode_for_superscript + "\u0F96");
ewts2uni_test(ewts_for_superscript + "j",
unicode_for_superscript + "\u0F97");
ewts2uni_test(ewts_for_superscript + "ny",
unicode_for_superscript + "\u0F99");
ewts2uni_test(ewts_for_superscript + "T",
unicode_for_superscript + "\u0F9A");
ewts2uni_test(ewts_for_superscript + "Th",
unicode_for_superscript + "\u0F9B");
ewts2uni_test(ewts_for_superscript + "D",
unicode_for_superscript + "\u0F9C");
ewts2uni_test(ewts_for_superscript + "D+h",
unicode_for_superscript
+ (false ? "\u0F9D" : "\u0f9c\u0fb7"));
ewts2uni_test(ewts_for_superscript + "N",
unicode_for_superscript + "\u0F9E");
ewts2uni_test(ewts_for_superscript + "t",
unicode_for_superscript + "\u0F9F");
ewts2uni_test(ewts_for_superscript + "th",
unicode_for_superscript + "\u0FA0");
ewts2uni_test(ewts_for_superscript + "d",
unicode_for_superscript + "\u0FA1");
ewts2uni_test(ewts_for_superscript + "d+h",
unicode_for_superscript
+ (false ? "\u0FA2" : "\u0fa1\u0fb7"));
ewts2uni_test(ewts_for_superscript + "n",
unicode_for_superscript + "\u0FA3");
ewts2uni_test(ewts_for_superscript + "p",
unicode_for_superscript + "\u0FA4");
ewts2uni_test(ewts_for_superscript + "ph",
unicode_for_superscript + "\u0FA5");
ewts2uni_test(ewts_for_superscript + "b",
unicode_for_superscript + "\u0FA6");
ewts2uni_test(ewts_for_superscript + "b+h",
unicode_for_superscript
+ (false ? "\u0FA7" : "\u0fa6\u0fb7"));
ewts2uni_test(ewts_for_superscript + "m",
unicode_for_superscript + "\u0FA8");
ewts2uni_test(ewts_for_superscript + "ts",
unicode_for_superscript + "\u0FA9");
ewts2uni_test(ewts_for_superscript + "tsh",
unicode_for_superscript + "\u0FAA");
ewts2uni_test(ewts_for_superscript + "dz",
unicode_for_superscript + "\u0FAB");
ewts2uni_test(ewts_for_superscript + "dz+h",
unicode_for_superscript
+ (false ? "\u0FAC" : "\u0fab\u0fb7"));
ewts2uni_test(ewts_for_superscript + "w",
unicode_for_superscript + "\u0FAD");
ewts2uni_test(ewts_for_superscript + "zh",
unicode_for_superscript + "\u0FAE");
ewts2uni_test(ewts_for_superscript + "z",
unicode_for_superscript + "\u0FAF");
ewts2uni_test(ewts_for_superscript + "'",
unicode_for_superscript + "\u0FB0");
ewts2uni_test(ewts_for_superscript + "y",
unicode_for_superscript + "\u0FB1");
ewts2uni_test(ewts_for_superscript + "r",
unicode_for_superscript + "\u0FB2");
ewts2uni_test(ewts_for_superscript + "l",
unicode_for_superscript + "\u0FB3");
ewts2uni_test(ewts_for_superscript + "sh",
unicode_for_superscript + "\u0FB4");
ewts2uni_test(ewts_for_superscript + "Sh",
unicode_for_superscript + "\u0FB5");
ewts2uni_test(ewts_for_superscript + "s",
unicode_for_superscript + "\u0FB6");
ewts2uni_test(ewts_for_superscript + "h",
unicode_for_superscript + "\u0FB7");
ewts2uni_test(ewts_for_superscript + "a",
unicode_for_superscript + "\u0FB8");
ewts2uni_test(ewts_for_superscript + "k+Sh",
unicode_for_superscript
+ (false ? "\u0FB9" : "\u0f90\u0fb5"));
ewts2uni_test(ewts_for_superscript + "W",
unicode_for_superscript + "\u0FBA");
ewts2uni_test(ewts_for_superscript + "Y",
unicode_for_superscript + "\u0FBB");
ewts2uni_test(ewts_for_superscript + "R",
unicode_for_superscript + "\u0FBC");
ewts2uni_test("\\u0FBE", "\u0FBE");
ewts2uni_test("\\u0FBF", "\u0FBF");
ewts2uni_test("\\u0FC0", "\u0FC0");
@ -774,7 +867,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0FC3", "\u0FC3");
ewts2uni_test("\\u0FC4", "\u0FC4");
ewts2uni_test("\\u0FC5", "\u0FC5");
ewts2uni_test("\\u0FC6", "\u0FC6");
ewts2uni_test("\\u0FC6", achen + "\u0FC6"); // \u0fc6 is a combiner
ewts2uni_test("\\u0FC7", "\u0FC7");
ewts2uni_test("\\u0FC8", "\u0FC8");
ewts2uni_test("\\u0FC9", "\u0FC9");
@ -784,12 +877,16 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0FCF", "\u0FCF");
ewts2uni_test("\\u0FD0", "\u0FD0");
ewts2uni_test("\\u0FD1", "\u0FD1");
ewts2uni_test("_", "\u0020");
ewts2uni_test("_", "\u00a0"); // tibwn.ini says that the Unicode spec wants a non-breaking space.
ewts2uni_test("\\u534D", "\u534D");
ewts2uni_test("\\u5350", "\u5350");
ewts2uni_test("\\u0F88+k", "\u0F880F90"); // TODO(DLC)[EWTS->Tibetan]:
ewts2uni_test("\\u0F88+kh", "\u0F880F91");
/* TODO(DLC)[EWTS->Tibetan]: NOW do we want to ever generate \u0f21? EWTS->TMW and this makes sense, but EWTS->Unicode? */
ewts2uni_test("\\u0F88+k", "\u0F88\u0F90");
ewts2uni_test("\\u0F88+kh", "\u0F88\u0F91");
/* TODO(DLC)[EWTS->Tibetan]:
Do we want to ever generate \uf021? (NOT \u0f21, but the
private-use area (PUA) of Unicode). EWTS->TMW and this
makes sense, but EWTS->Unicode? */
ewts2uni_test("\\uF021", "\uF021");
ewts2uni_test("\\uF022", "\uF022");
ewts2uni_test("\\uF023", "\uF023");
@ -832,11 +929,13 @@ public class EWTSTest extends TestCase {
public void test__EWTS__32bit_unicode_escapes() {
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
assert_EWTS_error("\\uF0010000"); // TODO(dchandler): make it work
ewts2uni_test("\\uF0010000",
"[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: \\]\u0f68\u0f74[#ERROR ERROR TODO(DLC)[EWTS->Tibetan]: this character is illegal in EWTS: F]\u0f20\u0f20\u0f21\u0f20\u0f20\u0f20\u0f20"); // TODO(dchandler): make it work. Until you can, TODO(DLC)[EWTS->Tibetan]: make the following work:
if (RUN_FAILING_TESTS) assert_EWTS_error("\\uF0010000"); // TODO(DLC)[EWTS->Tibetan]: error subsystem is hosed
if (RUN_FAILING_TESTS) {
ewts2uni_test("\\ucafe0000",
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
// TODO(dchandler): make it "\ucafe0000");
if (false) {
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
// TODO(dchandler): make it "\ucafe0000");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
ewts2uni_test("\\ucafe0f00", "\ucafe0f00");
@ -849,42 +948,46 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\uffffffff", "\uffffffff");
ewts2uni_test("\\ueeeeeee2", "\ueeeeeee2");
}
ewts2uni_test("\\u00000000", "\u00000000");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000eff", "\u00000eff");
ewts2uni_test("\\u00000f00", "\u00000f00");
ewts2uni_test("\\u00000f40", "\u00000f40");
ewts2uni_test("\\u00000f70", "\u00000f70");
ewts2uni_test("\\u00000fff", "\u00000fff");
ewts2uni_test("\\u0000f000", "\u0000f000");
ewts2uni_test("\\u0000f01f", "\u0000f01f");
ewts2uni_test("\\u0000efff", "\u0000efff");
}
if (RUN_FAILING_TESTS) {
assertEquals("\u0f00", "\u00000f00"); // TODO(DLC)[EWTS->Tibetan]: this is why other test cases are failing. I think these tests rely on java 5.0 features (a.k.a., Tiger, 1.5) -- see http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
ewts2uni_test("\\u00000f00", "\u00000f00");
ewts2uni_test("\\u00000f40", "\u00000f40");
ewts2uni_test("\\u00000f70", "\u00000f70");
ewts2uni_test("\\u00000fff", "\u00000fff");
ewts2uni_test("\\u0000f000", "\u0000f000");
ewts2uni_test("\\u0000f01f", "\u0000f01f");
ewts2uni_test("\\u0000efff", "\u0000efff");
ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000eff", "\u0eff");
ewts2uni_test("\\u00000000", "\u0000");
ewts2uni_test("\\u00000eff", "\u0eff");
}
ewts2uni_test("\\u00000f00", "\u0f00");
ewts2uni_test("\\u00000f40", "\u0f40");
ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff");
ewts2uni_test("\\u0000f000", "\uf000");
ewts2uni_test("\\u0000f01f", "\uf01f");
ewts2uni_test("\\u0000efff", "\uefff");
if (RUN_FAILING_TESTS) {
ewts2uni_test("\\u00000f70", "\u0f70");
ewts2uni_test("\\u00000fff", "\u0fff");
ewts2uni_test("\\u0000f000", "\uf000");
ewts2uni_test("\\u0000f01f", "\uf01f");
ewts2uni_test("\\u0000efff", "\uefff");
}
assert_EWTS_error("\\UcaFe0000");
if (false) { // TODO(dchandler): make these work
if (RUN_FAILING_TESTS) { // TODO(dchandler): make these work
ewts2uni_test("\\UcaFe0000", "\ucaFe0000");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
ewts2uni_test("\\UcaFef000", "\ucaFef000");
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
ewts2uni_test("\\UcaFef000", "\ucaFef000");
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
}
}
@ -897,48 +1000,85 @@ public class EWTSTest extends TestCase {
assert_EWTS_error("kSha"); // use "k+Sha" instead
assert_EWTS_error("pM"); // use "paM" instead (TODO(DLC)[EWTS->Tibetan]: NOW NO!)
assert_EWTS_error("pH"); // use "paM" instead (TODO(DLC)[EWTS->Tibetan]: NOW NO!)
ewts2uni_test("pM", "\u0f54\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: should this be an EWTS error, forcing the use of "paM" instead?
ewts2uni_test("pH", "\u0f54\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: should this be an EWTS error, forcing the use of "paH" instead?
assert_EWTS_error("kja"); // use "kaja" or "k.ja" instead
assert_EWTS_error("kA+u"); // use "ku+A" (bottom-to-top) or "kU" instead
ewts2uni_test("kA+u", "\u0f40\u0f71\u0f74"); // TODO(DLC)[EWTS->Tibetan]: should this be an EWTS error, forcing the use of either "ku+A" (bottom-to-top) or "kU"?
assert_EWTS_error("bna"); // use "b+na" or "bana" instead // TODO(DLC)[EWTS->Tibetan]: tell D. Chapman about this; an old e-mail said my test cases would be brutal and here's brutal
assert_EWTS_error("bn?");
assert_EWTS_error("bni");
assert_EWTS_error("bnA");
assert_EWTS_error("bn-I");
{
ewts2uni_test("bsna", "\u0f56\u0f66\u0fa3"); // [bs+na]/[bsna] is legal, but [bna] is not according to prefix rules.
assert_EWTS_error("bna"); // use "b+na" or "bana" instead, depending on what you mean
// TODO(DLC)[EWTS->Tibetan]: tell D. Chapman about this; an old e-mail said my test cases would be brutal and here's brutal
assert_EWTS_error("bn?");
assert_EWTS_error("bni");
assert_EWTS_error("bnA");
assert_EWTS_error("bn-I");
}
// a+r is not a standard stack; neither is a+l:
assert_EWTS_error("ar-i");
assert_EWTS_error("ar-I");
assert_EWTS_error("al-i");
assert_EWTS_error("al-I");
if (RUN_FAILING_TESTS) {
// These should be errors... a+r is not a standard stack;
// neither is a+l. [a.r-i] is how you get
// \u0f68\u0f62\u0f80, not [ar-i].
assert_EWTS_error("ar-i");
assert_EWTS_error("ar-I");
assert_EWTS_error("al-i");
assert_EWTS_error("al-I");
}
assert_EWTS_error("g..ya"); // use "g.ya" instead
assert_EWTS_error("m..");
assert_EWTS_error("g"); // use "ga" instead TODO(DLC)[EWTS->Tibetan]:?
assert_EWTS_error("k\\u0f19"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f18"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3e"); // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f3f"); // only numbers combine with f19,f18,f3e,f3f
if (RUN_FAILING_TESTS) assert_EWTS_error("g..ya"); // use "g.ya" instead for \u0f42\u0f61
if (RUN_FAILING_TESTS) assert_EWTS_error("m..");
if (RUN_FAILING_TESTS) assert_EWTS_error("..m");
assert_EWTS_error(".");
if (RUN_FAILING_TESTS) assert_EWTS_error(".ma");
if (RUN_FAILING_TESTS) assert_EWTS_error("g"); // use "ga" instead. TODO(DLC)[EWTS->Tibetan]: Really?
if (RUN_FAILING_TESTS) {
{ // only numbers combine with f19,f18,f3e,f3f
assert_EWTS_error("k\\u0f19");
assert_EWTS_error("k\\u0f18");
assert_EWTS_error("k\\u0f3e");
assert_EWTS_error("k\\u0f3f");
}
}
}
public void testDLCFailingNow() { // TODO(DLC)[EWTS->Tibetan]
assert_EWTS_error("\\u0f19");
assert_EWTS_error("\\u0f18");
if (RUN_FAILING_TESTS) {
assert_EWTS_error("\\u0f19");
assert_EWTS_error("\\u0f18");
}
assert_EWTS_error("\\u0f19\u0f20"); // wrong order...
{
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
if (RUN_FAILING_TESTS) {
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
}
}
public void testMoreMiscellany() {
ewts2uni_test("r-i", "\u0f62\u0f80");
ewts2uni_test("r-I", "\u0f62\u0f81");
ewts2uni_test("l-i", "\u0f63\u0f80");
ewts2uni_test("l-I", "\u0f63\u0f81");
ewts2uni_test("ga\u0f0bga ga\\u0F0bga",
"\u0f42\u0f0b\u0f42\u0f0b\u0f42\u0f0b\u0f42");
ewts2uni_test("ga\u0f0cga*ga\\u0f0Cga",
"\u0f42\u0f0c\u0f42\u0f0c\u0f42\u0f0c\u0f42");
ewts2uni_test("'jam",
"\u0f60\u0f47\u0f58");
ewts2uni_test("jamX 'jam~X",
"\u0f47\u0f58\u0f37\u0f0b\u0f60\u0f47\u0f58\u0f35");
ewts2uni_test("@#", "\u0f04\u0f05");
assert_EWTS_error("dzaHsogs"); // TODO(DLC)[EWTS->Tibetan]: Ask. If H is punctuation-like then perhaps we need to implement a lexical conversion from H to H<invisible punct>
}
/** TODO(DLC)[EWTS->Tibetan]: set this to true and fix the code or
* the test cases until things are green. */
private static final boolean RUN_FAILING_TESTS = false;
}
// TODO(DLC)[EWTS->Tibetan]: if 'k' were illegal, then would you have to say