I really hesitate to commit this because I'm not sure what it brings to the
table exactly and I fear that it makes the ACIP->Tibetan converter code a lot uglier. The TODO(DLC)[EWTS->Tibetan] comments littered throughout are part of the ugliness; they point to the ugliness. If each were addressed, cleanliness could perhaps be achieved. I've largely forgotten exactly what this change does, but it attempts to improve EWTS->Tibetan conversion. The lexer is probably really, really primitive. I concentrate here on converting a single tsheg bar rather than a whole document. Eclipse was used during part of my journey here and some imports were reorganized merely because I could. :) (Eclipse was needed when the usual ant build failed to run a new test EWTSTest. And I wanted its debugger.) Next steps: end-to-end EWTS tests should bring many problems to light. Fix those. Triage all the TODO comments. I don't know that I'll ever really trust the implementation. The tests are valuable, though. A clean implementation of EWTS->Tibetan in Jython might hold enough interest for me; I'd like to learn Python.
This commit is contained in:
parent
f64bae8ea6
commit
7198f23361
45 changed files with 1666 additions and 695 deletions
|
@ -18,12 +18,12 @@ Contributor(s): ______________________________________.
|
|||
|
||||
package org.thdl.tib.text.ttt;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.thdl.util.ThdlOptions;
|
||||
import org.thdl.tib.text.tshegbar.UnicodeUtils;
|
||||
|
||||
/** Tests this package's ability to understand EWTS and turn it into
|
||||
* the appropriate TMW or Unicode.
|
||||
|
@ -53,16 +53,106 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
public EWTSTest() { }
|
||||
|
||||
/** Prints a human-readable explanation of how actual and expected
|
||||
* differ to out. Precondition: expected is non-null, out is
|
||||
* non-null */
|
||||
static void explainInequality(String actual, String expected, PrintStream out) {
|
||||
if (null == actual)
|
||||
out.println("Expected \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(expected)
|
||||
+ "\" but found the null string");
|
||||
if (actual.length() != expected.length()) {
|
||||
out.println("Expected a string with " + expected.length()
|
||||
+ " characters but found a string with "
|
||||
+ actual.length() + " characters");
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < actual.length(); i++) {
|
||||
if (actual.charAt(i) != expected.charAt(i)) {
|
||||
out.println("Expected string \"" + UnicodeUtils.unicodeStringToPrettyString(expected) + "\" but found the string \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(actual)
|
||||
+ "\" which differs at character " + i + " (counting from zero, not one)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Causes a JUnit test case failure unless the EWTS document ewts
|
||||
* converts to the unicode expectedUnicode. */
|
||||
static void ewts2uni_test(String ewts, String expectedUnicode) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: NOW! Implement me.
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
null, true,
|
||||
"None", // TODO(DLC)[EWTS->Tibetan]: ???
|
||||
false /* short warnings */);
|
||||
if (null == unicode) {
|
||||
if (null != expectedUnicode && "none" != expectedUnicode) {
|
||||
System.out.println("No unicode exists for " + ewts
|
||||
+ " but you expected "
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
|
||||
assertTrue(false);
|
||||
}
|
||||
System.out.println("Unicode for " + ewts + " can't be had; errors are " + errors);
|
||||
} else {
|
||||
if (null != expectedUnicode && !expectedUnicode.equals(unicode)) {
|
||||
explainInequality(unicode, expectedUnicode, System.out);
|
||||
if (UnicodeUtils.unicodeStringToPrettyString(unicode).equals(UnicodeUtils.unicodeStringToPrettyString(expectedUnicode))) {
|
||||
System.out.println("UGLY strings: The unicode for\n \"" + ewts
|
||||
+ "\"\nis\n \""
|
||||
+ unicode
|
||||
+ "\",\nbut you expected\n \""
|
||||
+ expectedUnicode
|
||||
+ "\"");
|
||||
} else {
|
||||
System.out.println("The unicode for\n \"" + ewts
|
||||
+ "\"\nis\n \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(unicode)
|
||||
+ "\",\nbut you expected\n \""
|
||||
+ UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)
|
||||
+ "\"");
|
||||
}
|
||||
{
|
||||
StringBuffer sb = new StringBuffer(ewts);
|
||||
EWTSTshegBarScanner.ExpandEscapeSequences(sb);
|
||||
TPairList[] la
|
||||
= EWTSTraits.instance().breakTshegBarIntoChunks(sb.toString(), false);
|
||||
assertTrue(la[1] == null);
|
||||
System.out.println("EWTS=" + ewts + " and l'=" + la[0].toString2());
|
||||
}
|
||||
assertTrue(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true iff ewts is not a valid EWTS string. */
|
||||
static boolean hasEwtsError(String ewts) {
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = TConverter.convertToUnicodeText(EWTSTraits.instance(),
|
||||
ewts, errors,
|
||||
null, true,
|
||||
"None", // TODO(DLC)[EWTS->Tibetan]: ???
|
||||
true);
|
||||
// TODO(DLC)[EWTS->Tibetan]: Is this sufficient?
|
||||
return (null == unicode || errors.length() > 0);
|
||||
}
|
||||
|
||||
/** Causes a JUnit test case failure iff the EWTS document ewts is
|
||||
* legal EWTS transliteration. */
|
||||
static void assert_EWTS_error(String ewts) {
|
||||
// TODO(DLC)[EWTS->Tibetan]: NOW! Implement me.
|
||||
boolean ewts_error = hasEwtsError(ewts);
|
||||
assertTrue(ewts_error);
|
||||
}
|
||||
|
||||
/** Tests that the EWTS->unicode converter isn't completely
|
||||
braindead. */
|
||||
public void testEwtsBasics() {
|
||||
ewts2uni_test("ma", "\u0f58");
|
||||
ewts2uni_test("mi", "\u0f58\u0f72");
|
||||
ewts2uni_test("mi ", "\u0f58\u0f72\u0f0b");
|
||||
ewts2uni_test("mi/", "\u0f58\u0f72\u0f0d");
|
||||
ewts2uni_test("bra ", "\u0f56\u0fb2\u0f0b");
|
||||
ewts2uni_test("b+ra ", "\u0f56\u0fb2\u0f0b");
|
||||
ewts2uni_test("b+Ra ", "\u0f56\u0fbc\u0f0b");
|
||||
}
|
||||
|
||||
/** Miscellaneous tests of EWTS->Unicode conversion. */
|
||||
|
@ -83,17 +173,18 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("k+Ya", "\u0f40\u0FBB");
|
||||
ewts2uni_test("k+Ra", "\u0f40\u0FBC");
|
||||
ewts2uni_test("k+wa", "\u0f40\u0Fad");
|
||||
ewts2uni_test("k+ya", "\u0f40\u0Fb3");
|
||||
ewts2uni_test("k+la", "\u0f40\u0Fb3");
|
||||
ewts2uni_test("k+ya", "\u0f40\u0Fb1");
|
||||
ewts2uni_test("k+ra", "\u0f40\u0Fb2");
|
||||
|
||||
ewts2uni_test("r-I", "\u0f62\u0f81");
|
||||
ewts2uni_test("l-I", "\u0f63\u0f81");
|
||||
ewts2uni_test("r-i", "\u0f62\u0f80");
|
||||
ewts2uni_test("l-i", "\u0f63\u0f80");
|
||||
ewts2uni_test("gr-i", "\u0f42\u0f76"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb2\u0f80"
|
||||
ewts2uni_test("gr-I", "\u0f42\u0f77"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb2\u0f81"
|
||||
ewts2uni_test("gl-i", "\u0f42\u0f78"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb3\u0f80"
|
||||
ewts2uni_test("gl-I", "\u0f42\u0f79"); // TODO(DLC)[EWTS->Tibetan]: "\u0f42\u0fb3\u0f81"
|
||||
ewts2uni_test("gr-i", "\u0f42\u0fb2\u0f80");
|
||||
ewts2uni_test("gr-I", "\u0f42\u0fb2\u0f81");
|
||||
ewts2uni_test("gl-i", "\u0f42\u0fb3\u0f80");
|
||||
ewts2uni_test("gl-I", "\u0f42\u0fb3\u0f81");
|
||||
}
|
||||
|
||||
|
||||
|
@ -102,26 +193,39 @@ public class EWTSTest extends TestCase {
|
|||
* mostly by testing that the Unicode generated for a single
|
||||
* wowel or set of wowels atop achen (U+0F68) is correct. */
|
||||
public void test__EWTS__wowels_on_achen() {
|
||||
|
||||
assert_EWTS_error("+yo");
|
||||
ewts2uni_test("a+yo", "\u0f68\u0fb1\u0f7c");
|
||||
ewts2uni_test("a+yo+o", "\u0f68\u0fb1\u0f7c\u0f7c");
|
||||
ewts2uni_test("a+ya.una", "\u0f68\u0fb1\u0f68\u0f74\u0f53");
|
||||
ewts2uni_test("a+yauna", "\u0f68\u0fb1\u0f7d\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn that '.' might have been needed
|
||||
ewts2uni_test("a+yoona", "\u0f68\u0fb1\u0f7c\u0f68\u0f7c\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn!
|
||||
ewts2uni_test("a+yoon", "\u0f68\u0fb1\u0f7c\u0f68\u0f7c\u0f53"); // TODO(DLC)[EWTS->Tibetan]: warn!
|
||||
// ewts2uni_test("a+yo+ona", "TODO(DLC)[EWTS->Tibetan]");
|
||||
|
||||
ewts2uni_test("A", "\u0f68\u0f71");
|
||||
ewts2uni_test("i", "\u0f68\u0f72");
|
||||
ewts2uni_test("I", "\u0f68\u0f73");
|
||||
ewts2uni_test("I", "\u0f68\u0f71\u0f72");
|
||||
ewts2uni_test("u", "\u0f68\u0f74");
|
||||
ewts2uni_test("U", "\u0f68\u0f75");
|
||||
ewts2uni_test("a+r-i", "\u0f68\u0f76");
|
||||
ewts2uni_test("a+r-I", "\u0f68\u0f77");
|
||||
ewts2uni_test("a+l-i", "\u0f68\u0f78");
|
||||
ewts2uni_test("a+l-I", "\u0f68\u0f79");
|
||||
ewts2uni_test("U", "\u0f68\u0f71\u0f74");
|
||||
ewts2uni_test("a+r-i", "\u0f68\u0fb2\u0f80");
|
||||
ewts2uni_test("a+r-I", "\u0f68\u0fb2\u0f81");
|
||||
ewts2uni_test("a+l-i", "\u0f68\u0fb3\u0f80");
|
||||
ewts2uni_test("a+l-I", "\u0f68\u0fb3\u0f81");
|
||||
ewts2uni_test("e", "\u0f68\u0f7a");
|
||||
ewts2uni_test("ai", "\u0f68\u0f7b");
|
||||
// ewts2uni_test("ao", "\u0f68\u0f68\u0f7c"); // TODO(DLC)[EWTS->Tibetan]:
|
||||
// assert_EWTS_error("ao"); // TODO(DLC)[EWTS->Tibetan]:
|
||||
ewts2uni_test("o", "\u0f68\u0f7c");
|
||||
ewts2uni_test("au", "\u0f68\u0f7d");
|
||||
ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aM", "\u0f68\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aH", "\u0f68\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("-i", "\u0f68\u0f80");
|
||||
ewts2uni_test("-I", "\u0f68\u0f81");
|
||||
ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("a~M`", "\u0f68\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("a~M", "\u0f68\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("a?", "\u0f68\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("\\u0f68", "\u0f68");
|
||||
ewts2uni_test("a\\u0f86", "\u0f68\u0f86");
|
||||
ewts2uni_test("a\\U0f86", "\u0f68\u0f86");
|
||||
ewts2uni_test("a\\U0F86", "\u0f68\u0f86");
|
||||
|
@ -132,24 +236,32 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("a\\u00000F86", "\u0f68\u0f86");
|
||||
ewts2uni_test("a\\u0f87", "\u0f68\u0f87");
|
||||
|
||||
ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
// ewts2uni_test("aMH", "\u0f68\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
// ewts2uni_test("aHM", "\u0f68\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("a", "\u0f68");
|
||||
|
||||
}
|
||||
|
||||
public void test__EWTS__stacked_wowels_on_achen() {
|
||||
if (false) { // TODO(DLC)[EWTS->Tibetan]: make this true ASAP
|
||||
ewts2uni_test("o+o", "\u0f68\u0f7c\u0f7c");
|
||||
assert_EWTS_error("a+o"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
assert_EWTS_error("o+a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
assert_EWTS_error("ka+o"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("A+i", "\u0f68\u0f73");
|
||||
ewts2uni_test("o+o", "\u0f68\u0f7d");
|
||||
ewts2uni_test("e+e", "\u0f68\u0f7b");
|
||||
ewts2uni_test("e+e+e", "\u0f68\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("e+e+e+e", "\u0f68\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("A+i", "\u0f68\u0f71\u0f72");
|
||||
ewts2uni_test("e+e", "\u0f68\u0f7a\u0f7a");
|
||||
ewts2uni_test("e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("e+e+e+e+e", "\u0f68\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("o+e", "\u0f68\u0f7c\u0f7a");
|
||||
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("u+A", "\u0f68\u0f75");
|
||||
ewts2uni_test("u+A+i+o+e", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("u+A+i+o+eHM", "\u0f68\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("u+A", "\u0f68\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("a", "\u0f68");
|
||||
ewts2uni_test("o+-I", "DLC");
|
||||
}
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
|
@ -158,14 +270,16 @@ public class EWTSTest extends TestCase {
|
|||
public void test__EWTS__wowels_on_ka() {
|
||||
ewts2uni_test("kA", "\u0f40\u0f71");
|
||||
ewts2uni_test("ki", "\u0f40\u0f72");
|
||||
ewts2uni_test("kI", "\u0f40\u0f73");
|
||||
ewts2uni_test("kI", "\u0f40\u0f71\u0f72");
|
||||
ewts2uni_test("ku", "\u0f40\u0f74");
|
||||
ewts2uni_test("kU", "\u0f40\u0f75");
|
||||
ewts2uni_test("ka+r-i", "\u0f40\u0f76");
|
||||
ewts2uni_test("ka+r-I", "\u0f40\u0f77");
|
||||
ewts2uni_test("ka+l-i", "\u0f40\u0f78");
|
||||
ewts2uni_test("ka+l-I", "\u0f40\u0f79");
|
||||
ewts2uni_test("kU", "\u0f40\u0f71\u0f74");
|
||||
ewts2uni_test("k+r-i", "\u0f40\u0fb2\u0f80");
|
||||
ewts2uni_test("k+r-I", "\u0f40\u0fb2\u0f81");
|
||||
ewts2uni_test("k+l-i", "\u0f40\u0fb3\u0f80");
|
||||
ewts2uni_test("k+l-I", "\u0f40\u0fb3\u0f81");
|
||||
ewts2uni_test("ke", "\u0f40\u0f7a");
|
||||
ewts2uni_test("e", "\u0f68\u0f7a");
|
||||
ewts2uni_test("a", "\u0f68");
|
||||
ewts2uni_test("kai", "\u0f40\u0f7b");
|
||||
ewts2uni_test("ko", "\u0f40\u0f7c");
|
||||
ewts2uni_test("kau", "\u0f40\u0f7d");
|
||||
|
@ -192,34 +306,39 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("kA+i", "\u0f40\u0f73");
|
||||
ewts2uni_test("ko+o", "\u0f40\u0f7d");
|
||||
ewts2uni_test("ke+e", "\u0f40\u0f7b");
|
||||
ewts2uni_test("ke+e+e", "\u0f40\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("kA+i", "\u0f40\u0f71\u0f72");
|
||||
ewts2uni_test("ko+o", "\u0f40\u0f7c\u0f7c");
|
||||
ewts2uni_test("ke+e", "\u0f40\u0f7a\u0f7a");
|
||||
ewts2uni_test("ke+e+e", "\u0f40\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ke+e+e+e+e", "\u0f40\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("ko+e", "\u0f40\u0f7c\u0f7a");
|
||||
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("ku+A", "\u0f40\u0f75");
|
||||
ewts2uni_test("ku+A+i+o+e", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("ku+A+i+o+eHM", "\u0f40\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("ku+A", "\u0f40\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("k", "\u0f40");
|
||||
ewts2uni_test("ka", "\u0f40");
|
||||
|
||||
assert_EWTS_error("ka+r-i"); // TODO(DLC)[EWTS->Tibetan]: right?
|
||||
assert_EWTS_error("ka+r-I");
|
||||
assert_EWTS_error("ka+l-i");
|
||||
assert_EWTS_error("ka+l-I");
|
||||
|
||||
assert_EWTS_error("ko+a");
|
||||
assert_EWTS_error("ka+o");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
* mostly by testing that the Unicode generated for a single
|
||||
* wowel or set of wowels atop achung (U+0F60) is correct. */
|
||||
public void test__EWTS__wowels_on_achung() {
|
||||
ewts2uni_test("'a", "\u0f60");
|
||||
ewts2uni_test("'A", "\u0f60\u0f71");
|
||||
ewts2uni_test("'i", "\u0f60\u0f72");
|
||||
ewts2uni_test("'I", "\u0f60\u0f73");
|
||||
ewts2uni_test("'I", "\u0f60\u0f71\u0f72");
|
||||
ewts2uni_test("'u", "\u0f60\u0f74");
|
||||
ewts2uni_test("'U", "\u0f60\u0f75");
|
||||
ewts2uni_test("'a+r-i", "\u0f60\u0f76");
|
||||
ewts2uni_test("'a+r-I", "\u0f60\u0f77");
|
||||
ewts2uni_test("'a+l-i", "\u0f60\u0f78");
|
||||
ewts2uni_test("'a+l-I", "\u0f60\u0f79");
|
||||
ewts2uni_test("'U", "\u0f60\u0f71\u0f74");
|
||||
ewts2uni_test("'e", "\u0f60\u0f7a");
|
||||
ewts2uni_test("'ai", "\u0f60\u0f7b");
|
||||
ewts2uni_test("'o", "\u0f60\u0f7c");
|
||||
|
@ -247,75 +366,81 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("'A+i", "\u0f60\u0f73");
|
||||
ewts2uni_test("'o+o", "\u0f60\u0f7d");
|
||||
ewts2uni_test("'e+e", "\u0f60\u0f7b");
|
||||
ewts2uni_test("'e+e+e", "\u0f60\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'A+i", "\u0f60\u0f71\u0f72");
|
||||
ewts2uni_test("'o+o", "\u0f60\u0f7c\u0f7c");
|
||||
ewts2uni_test("'e+e", "\u0f60\u0f7a\u0f7a");
|
||||
ewts2uni_test("'e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'e+e+e+e+e", "\u0f60\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("'o+e", "\u0f60\u0f7c\u0f7a");
|
||||
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("'u+A+i+o+e", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("'u+A+i+o+eHM", "\u0f60\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
|
||||
ewts2uni_test("'u+A", "\u0f60\u0f75");
|
||||
ewts2uni_test("'u+A", "\u0f60\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("'", "\u0f60");
|
||||
ewts2uni_test("'a", "\u0f60");
|
||||
|
||||
ewts2uni_test("'+r-i", "\u0f60\u0fb2\u0f80");
|
||||
ewts2uni_test("'+r-I", "\u0f60\u0fb2\u0f81");
|
||||
ewts2uni_test("'+l-i", "\u0f60\u0fb3\u0f80");
|
||||
ewts2uni_test("'+l-I", "\u0f60\u0fb3\u0f81");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
* mostly by testing that the Unicode generated for a single
|
||||
* wowel or set of wowels atop k+Sh (U+0F69) is correct. */
|
||||
public void test__EWTS__wowels_on_kSh() {
|
||||
ewts2uni_test("k+ShA", "\u0f69\u0f71");
|
||||
ewts2uni_test("k+Shi", "\u0f69\u0f72");
|
||||
ewts2uni_test("k+ShI", "\u0f69\u0f73");
|
||||
ewts2uni_test("k+Shu", "\u0f69\u0f74");
|
||||
ewts2uni_test("k+ShU", "\u0f69\u0f75");
|
||||
ewts2uni_test("k+Sha+r-i", "\u0f69\u0f76");
|
||||
ewts2uni_test("k+Sha+r-I", "\u0f69\u0f77");
|
||||
ewts2uni_test("k+Sha+l-i", "\u0f69\u0f78");
|
||||
ewts2uni_test("k+Sha+l-I", "\u0f69\u0f79");
|
||||
ewts2uni_test("k+She", "\u0f69\u0f7a");
|
||||
ewts2uni_test("k+Shai", "\u0f69\u0f7b");
|
||||
ewts2uni_test("k+Sho", "\u0f69\u0f7c");
|
||||
ewts2uni_test("k+Shau", "\u0f69\u0f7d");
|
||||
ewts2uni_test("k+ShaM", "\u0f69\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaH", "\u0f69\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sh-i", "\u0f69\u0f80");
|
||||
ewts2uni_test("k+Sh-I", "\u0f69\u0f81");
|
||||
ewts2uni_test("k+Sha~M`", "\u0f69\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha~M", "\u0f69\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha?", "\u0f69\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha\\u0f86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\U0f86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\U0F86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u0F86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000f86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000f86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000F86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000F86", "\u0f69\u0f86");
|
||||
ewts2uni_test("k+Sha\\u0f87", "\u0f69\u0f87");
|
||||
ewts2uni_test("k+ShA", "\u0f40\u0fb5\u0f71");
|
||||
ewts2uni_test("k+Shi", "\u0f40\u0fb5\u0f72");
|
||||
ewts2uni_test("k+ShI", "\u0f40\u0fb5\u0f71\u0f72");
|
||||
ewts2uni_test("k+Shu", "\u0f40\u0fb5\u0f74");
|
||||
ewts2uni_test("k+ShU", "\u0f40\u0fb5\u0f71\u0f74");
|
||||
ewts2uni_test("k+She", "\u0f40\u0fb5\u0f7a");
|
||||
ewts2uni_test("k+Shai", "\u0f40\u0fb5\u0f7b");
|
||||
ewts2uni_test("k+Sho", "\u0f40\u0fb5\u0f7c");
|
||||
ewts2uni_test("k+Shau", "\u0f40\u0fb5\u0f7d");
|
||||
ewts2uni_test("k+ShaM", "\u0f40\u0fb5\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaH", "\u0f40\u0fb5\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sh-i", "\u0f40\u0fb5\u0f80");
|
||||
ewts2uni_test("k+Sh-I", "\u0f40\u0fb5\u0f81");
|
||||
ewts2uni_test("k+Sha~M`", "\u0f40\u0fb5\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha~M", "\u0f40\u0fb5\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha?", "\u0f40\u0fb5\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+Sha\\u0f86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\U0f86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\U0F86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u0F86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000f86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000f86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000F86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u00000F86", "\u0f40\u0fb5\u0f86");
|
||||
ewts2uni_test("k+Sha\\u0f87", "\u0f40\u0fb5\u0f87");
|
||||
|
||||
ewts2uni_test("k+ShaMH", "\u0f69\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaHM", "\u0f69\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaMH", "\u0f40\u0fb5\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("k+ShaHM", "\u0f40\u0fb5\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("k+ShA+i", "\u0f69\u0f73");
|
||||
ewts2uni_test("k+Sho+o", "\u0f69\u0f7d");
|
||||
ewts2uni_test("k+She+e", "\u0f69\u0f7b");
|
||||
ewts2uni_test("k+She+e+e", "\u0f69\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e", "\u0f69\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e+e", "\u0f69\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+Sho+e", "\u0f69\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+e", "\u0f69\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f69\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+Shu+A", "\u0f69\u0f75");
|
||||
ewts2uni_test("k+ShA+i", "\u0f40\u0fb5\u0f71\u0f72");
|
||||
ewts2uni_test("k+Sho+o", "\u0f40\u0fb5\u0f7c\u0f7c");
|
||||
ewts2uni_test("k+She+e", "\u0f40\u0fb5\u0f7a\u0f7a");
|
||||
ewts2uni_test("k+She+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+She+e+e+e+e", "\u0f40\u0fb5\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+Sho+e", "\u0f40\u0fb5\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+e", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+Shu+A+i+o+eHM", "\u0f40\u0fb5\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+Shu+A", "\u0f40\u0fb5\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("k+Sh", "\u0f69");
|
||||
ewts2uni_test("k+Sha", "\u0f69");
|
||||
ewts2uni_test("k+Sh", "\u0f40\u0fb5");
|
||||
ewts2uni_test("k+Sha", "\u0f40\u0fb5");
|
||||
|
||||
ewts2uni_test("k+Sh+r-i", "\u0f40\u0fb5\u0fb2\u0f80");
|
||||
ewts2uni_test("k+Sh+r-I", "\u0f40\u0fb5\u0fb2\u0f81");
|
||||
ewts2uni_test("k+Sh+l-i", "\u0f40\u0fb5\u0fb3\u0f80");
|
||||
ewts2uni_test("k+Sh+l-I", "\u0f40\u0fb5\u0fb3\u0f81");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
|
@ -325,25 +450,22 @@ public class EWTSTest extends TestCase {
|
|||
public void test__EWTS__wowels_on_phyw() {
|
||||
ewts2uni_test("phywA", "\u0f55\u0fb1\u0fad\u0f71");
|
||||
ewts2uni_test("phywi", "\u0f55\u0fb1\u0fad\u0f72");
|
||||
ewts2uni_test("phywI", "\u0f55\u0fb1\u0fad\u0f73");
|
||||
ewts2uni_test("phywI", "\u0f55\u0fb1\u0fad\u0f71\u0f72");
|
||||
ewts2uni_test("phywu", "\u0f55\u0fb1\u0fad\u0f74");
|
||||
ewts2uni_test("phywU", "\u0f55\u0fb1\u0fad\u0f75");
|
||||
ewts2uni_test("phywa+r-i", "\u0f55\u0fb1\u0fad\u0f76");
|
||||
ewts2uni_test("phywa+r-I", "\u0f55\u0fb1\u0fad\u0f77");
|
||||
ewts2uni_test("phywa+l-i", "\u0f55\u0fb1\u0fad\u0f78");
|
||||
ewts2uni_test("phywa+l-I", "\u0f55\u0fb1\u0fad\u0f79");
|
||||
ewts2uni_test("phywU", "\u0f55\u0fb1\u0fad\u0f71\u0f74");
|
||||
ewts2uni_test("phywe", "\u0f55\u0fb1\u0fad\u0f7a");
|
||||
ewts2uni_test("phywai", "\u0f55\u0fb1\u0fad\u0f7b");
|
||||
ewts2uni_test("phywo", "\u0f55\u0fb1\u0fad\u0f7c");
|
||||
ewts2uni_test("phywau", "\u0f55\u0fb1\u0fad\u0f7d");
|
||||
ewts2uni_test("phywaM", "\u0f55\u0fb1\u0fad\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaH", "\u0f55\u0fb1\u0fad\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phyw-i", "\u0f55\u0fb1\u0fad\u0f80");
|
||||
ewts2uni_test("phyw-I", "\u0f55\u0fb1\u0fad\u0f81");
|
||||
ewts2uni_test("phywa~M`", "\u0f55\u0fb1\u0fad\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phyw\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
assertEquals(EWTSTraits.instance().getUnicodeForWowel("\u0f86+\u0f84"), "\u0f86\u0f84");
|
||||
|
||||
ewts2uni_test("phyw\\u0f84\\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
|
||||
ewts2uni_test("phyw\\u0f84\u0f86", "\u0f55\u0fb1\u0fad\u0f84\u0f86");
|
||||
ewts2uni_test("phywa\\u0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
ewts2uni_test("phywa\\u0f86\u0f84", "\u0f55\u0fb1\u0fad\u0f86\u0f84");
|
||||
ewts2uni_test("phywa\\U0f86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
ewts2uni_test("phywa\\U0F86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
ewts2uni_test("phywa\\u0F86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
|
@ -353,25 +475,34 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("phywa\\u00000F86", "\u0f55\u0fb1\u0fad\u0f86");
|
||||
ewts2uni_test("phywa\\u0f87", "\u0f55\u0fb1\u0fad\u0f87");
|
||||
|
||||
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("phywA+i", "\u0f55\u0fb1\u0fad\u0f73");
|
||||
ewts2uni_test("phywo+o", "\u0f55\u0fb1\u0fad\u0f7d");
|
||||
ewts2uni_test("phywe+e", "\u0f55\u0fb1\u0fad\u0f7b");
|
||||
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywA+i", "\u0f55\u0fb1\u0fad\u0f71\u0f72");
|
||||
ewts2uni_test("phywo+o", "\u0f55\u0fb1\u0fad\u0f7c\u0f7c");
|
||||
ewts2uni_test("phywe+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a");
|
||||
ewts2uni_test("phywe+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywe+e+e+e+e", "\u0f55\u0fb1\u0fad\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("phywo+e", "\u0f55\u0fb1\u0fad\u0f7c\u0f7a");
|
||||
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f75");
|
||||
ewts2uni_test("phywu+A+i+o+e", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("phywu+A+i+o+eHM", "\u0f55\u0fb1\u0fad\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("phywu+A", "\u0f55\u0fb1\u0fad\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("phyw", "\u0f55\u0fb1\u0fad");
|
||||
ewts2uni_test("phywa", "\u0f55\u0fb1\u0fad");
|
||||
|
||||
ewts2uni_test("phywaM", "\u0f55\u0fb1\u0fad\u0f7e"); /* TODO(DLC)[EWTS->Tibetan]: NOW: aM is not a wowel! */ // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaH", "\u0f55\u0fb1\u0fad\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa~M`", "\u0f55\u0fb1\u0fad\u0f82"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa~M", "\u0f55\u0fb1\u0fad\u0f83"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywa?", "\u0f55\u0fb1\u0fad\u0f84"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaMH", "\u0f55\u0fb1\u0fad\u0f7e\u0f7f"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
ewts2uni_test("phywaHM", "\u0f55\u0fb1\u0fad\u0f7f\u0f7e"); // TODO(DLC)[EWTS->Tibetan]: than needs to say
|
||||
|
||||
assert_EWTS_error("phywr-i");
|
||||
assert_EWTS_error("phyw+r-i");
|
||||
assert_EWTS_error("phyw+l-i");
|
||||
}
|
||||
|
||||
/** Tests that our implementation of EWTS's wowels are correct,
|
||||
|
@ -382,13 +513,9 @@ public class EWTSTest extends TestCase {
|
|||
public void test__EWTS__wowels_on_kjjkkj() {
|
||||
ewts2uni_test("k+j+j+k+k+jA", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71");
|
||||
ewts2uni_test("k+j+j+k+k+ji", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f73");
|
||||
ewts2uni_test("k+j+j+k+k+jI", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+ju", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74");
|
||||
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f75");
|
||||
ewts2uni_test("k+j+j+k+k+ja+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f76");
|
||||
ewts2uni_test("k+j+j+k+k+ja+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f77");
|
||||
ewts2uni_test("k+j+j+k+k+ja+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f78");
|
||||
ewts2uni_test("k+j+j+k+k+ja+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f79");
|
||||
ewts2uni_test("k+j+j+k+k+jU", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f74");
|
||||
ewts2uni_test("k+j+j+k+k+je", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+jai", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
|
||||
ewts2uni_test("k+j+j+k+k+jo", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c");
|
||||
|
@ -416,85 +543,52 @@ public class EWTSTest extends TestCase {
|
|||
|
||||
// Than's e-mails of Aug 10 and Aug 11, 2004 say that A+i is
|
||||
// the same as I and o+o is the same as au.
|
||||
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f73");
|
||||
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7d");
|
||||
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b");
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7b"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7b\u0f7b\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+jA+i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f71\u0f72");
|
||||
ewts2uni_test("k+j+j+k+k+jo+o", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7c");
|
||||
ewts2uni_test("k+j+j+k+k+je+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+je+e+e+e+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7a\u0f7a\u0f7a\u0f7a\u0f7a"); // TODO(DLC)[EWTS->Tibetan]:?
|
||||
ewts2uni_test("k+j+j+k+k+jo+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f75");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+e", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A+i+o+eHM", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71\u0f72\u0f7c\u0f7a\u0f7f\u0f7e");
|
||||
ewts2uni_test("k+j+j+k+k+ju+A", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0f74\u0f71");
|
||||
|
||||
ewts2uni_test("k+j+j+k+k+j", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
ewts2uni_test("k+j+j+k+k+ja", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97");
|
||||
ewts2uni_test("k+j+j+k+k+j+r-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f80");
|
||||
ewts2uni_test("k+j+j+k+k+j+r-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb2\u0f81");
|
||||
ewts2uni_test("k+j+j+k+k+j+l-i", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f80");
|
||||
ewts2uni_test("k+j+j+k+k+j+l-I", "\u0f40\u0f97\u0f97\u0f90\u0f90\u0f97\u0fb3\u0f81");
|
||||
}
|
||||
|
||||
/** Tests that the EWTS that the spec says corresponds to each
|
||||
* codepoint really does. */
|
||||
public void test__EWTS__tags_each_unicode_value() {
|
||||
ewts2uni_test("\\u0ef0", "\u0ef0");
|
||||
for (char i = '\u0ef0'; i < '\u1010'; i++) {
|
||||
// invalid codepoint like U+0F48? No problem! TODO(DLC)[EWTS->Tibetan]: NOTE: use a unicode "spell checker" to find such problems
|
||||
String s = new String(new char[] { i });
|
||||
ewts2uni_test(UnicodeUtils.unicodeStringToPrettyString(s), s);
|
||||
ewts2uni_test("\\" + UnicodeUtils.unicodeStringToPrettyString(s), s);
|
||||
}
|
||||
ewts2uni_test("\\u0000", "\u0000");
|
||||
ewts2uni_test("\\u0eff", "\u0eff");
|
||||
ewts2uni_test("\\u0eff", "\u0eff");
|
||||
ewts2uni_test("\\u0f00", "\u0f00");
|
||||
ewts2uni_test("\\u0f40", "\u0f40");
|
||||
ewts2uni_test("\\u0f70", "\u0f70");
|
||||
ewts2uni_test("\\u0fff", "\u0fff");
|
||||
assert_EWTS_error("\\u0f70"); // reserved codepoint
|
||||
assert_EWTS_error("\\u0fff"); // reserved codepoint
|
||||
ewts2uni_test("\\uf000", "\uf000");
|
||||
ewts2uni_test("\\uf01f", "\uf01f");
|
||||
ewts2uni_test("\\uefff", "\uefff");
|
||||
|
||||
ewts2uni_test("\\ucafe0000", "\ucafe0000");
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
ewts2uni_test("\\ucafe0f00", "\ucafe0f00");
|
||||
ewts2uni_test("\\ucafe0f40", "\ucafe0f40");
|
||||
ewts2uni_test("\\ucafe0f70", "\ucafe0f70");
|
||||
ewts2uni_test("\\ucafe0fff", "\ucafe0fff");
|
||||
ewts2uni_test("\\ucafef000", "\ucafef000");
|
||||
ewts2uni_test("\\ucafef01f", "\ucafef01f");
|
||||
ewts2uni_test("\\ucafeefff", "\ucafeefff");
|
||||
|
||||
|
||||
ewts2uni_test("\\u00000000", "\u00000000");
|
||||
ewts2uni_test("\\u00000eff", "\u00000eff");
|
||||
ewts2uni_test("\\u00000eff", "\u00000eff");
|
||||
ewts2uni_test("\\u00000f00", "\u00000f00");
|
||||
ewts2uni_test("\\u00000f40", "\u00000f40");
|
||||
ewts2uni_test("\\u00000f70", "\u00000f70");
|
||||
ewts2uni_test("\\u00000fff", "\u00000fff");
|
||||
ewts2uni_test("\\u0000f000", "\u0000f000");
|
||||
ewts2uni_test("\\u0000f01f", "\u0000f01f");
|
||||
ewts2uni_test("\\u0000efff", "\u0000efff");
|
||||
|
||||
ewts2uni_test("\\u00000000", "\u0000");
|
||||
ewts2uni_test("\\u00000eff", "\u0eff");
|
||||
ewts2uni_test("\\u00000eff", "\u0eff");
|
||||
ewts2uni_test("\\u00000f00", "\u0f00");
|
||||
ewts2uni_test("\\u00000f40", "\u0f40");
|
||||
ewts2uni_test("\\u00000f70", "\u0f70");
|
||||
ewts2uni_test("\\u00000fff", "\u0fff");
|
||||
ewts2uni_test("\\u0000f000", "\uf000");
|
||||
ewts2uni_test("\\u0000f01f", "\uf01f");
|
||||
ewts2uni_test("\\u0000efff", "\uefff");
|
||||
|
||||
ewts2uni_test("\\UcaFe0000", "\ucaFe0000");
|
||||
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
|
||||
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
|
||||
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
|
||||
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
|
||||
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
|
||||
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
|
||||
ewts2uni_test("\\UcaFef000", "\ucaFef000");
|
||||
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
|
||||
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
|
||||
|
||||
// Below was semiautomatically generated from the EWTS spec's
|
||||
// 'ewts.xml' representation (early August 2004 edition):
|
||||
ewts2uni_test("v", "\u0F56\u0F39");
|
||||
ewts2uni_test("f", "\u0F55\u0F39");
|
||||
|
||||
ewts2uni_test("\u0f88+ka", "\u0f88\u0f90");
|
||||
ewts2uni_test("\u0f88+kha", "\u0f88\u0f91");
|
||||
ewts2uni_test("oM", "\u0F00");
|
||||
ewts2uni_test("\\u0F01", "\u0F01");
|
||||
ewts2uni_test("\\u0F02", "\u0F02");
|
||||
|
@ -599,13 +693,13 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("s", "\u0F66");
|
||||
ewts2uni_test("h", "\u0F67");
|
||||
ewts2uni_test("a", "\u0F68");
|
||||
ewts2uni_test("k+Sh", "\u0F69");
|
||||
ewts2uni_test("k+Sh", "\u0f40\u0fb5"); // there is no way in EWTS to specify \u0f69 in particular without using \\u0f69
|
||||
ewts2uni_test("R+", "\u0F6A"); // TODO(DLC)[EWTS->Tibetan]: move to illegal test
|
||||
ewts2uni_test("A", "\u0F71");
|
||||
ewts2uni_test("A", "\u0F71"); // TODO(DLC)[EWTS->Tibetan]: no?! see above
|
||||
ewts2uni_test("i", "\u0F72");
|
||||
ewts2uni_test("I", "\u0F73");
|
||||
ewts2uni_test("I", "\u0F71\u0F72");
|
||||
ewts2uni_test("u", "\u0F74");
|
||||
ewts2uni_test("U", "\u0F75");
|
||||
ewts2uni_test("U", "\u0F71\u0F74");
|
||||
ewts2uni_test("r-i", "\u0F76");
|
||||
ewts2uni_test("r-I", "\u0F77");
|
||||
ewts2uni_test("l-i", "\u0F78");
|
||||
|
@ -731,15 +825,75 @@ public class EWTSTest extends TestCase {
|
|||
ewts2uni_test("\\uF041", "\uF041");
|
||||
ewts2uni_test("\\uF042", "\uF042");
|
||||
}
|
||||
|
||||
public void test__EWTS__long_wowels() {
|
||||
ewts2uni_test("k-I~M`~X", "\u0f40\u0f81\u0f82\u0f35"); // TODO(DLC)[EWTS->Tibetan]: actually the 0f68 stuff could be true... ask
|
||||
}
|
||||
|
||||
public void test__EWTS__32bit_unicode_escapes() {
|
||||
assert_EWTS_error("\\u00010000"); // TODO(dchandler): make it work
|
||||
assert_EWTS_error("\\uF0010000"); // TODO(dchandler): make it work
|
||||
ewts2uni_test("\\ucafe0000",
|
||||
"[#ERROR Sorry, we don't yet support Unicode escape sequences above 0x0000FFFF! File a bug.]");
|
||||
// TODO(dchandler): make it "\ucafe0000");
|
||||
if (false) {
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
ewts2uni_test("\\ucafe0eff", "\ucafe0eff");
|
||||
ewts2uni_test("\\ucafe0f00", "\ucafe0f00");
|
||||
ewts2uni_test("\\ucafe0f40", "\ucafe0f40");
|
||||
ewts2uni_test("\\ucafe0f70", "\ucafe0f70");
|
||||
ewts2uni_test("\\ucafe0fff", "\ucafe0fff");
|
||||
ewts2uni_test("\\ucafef000", "\ucafef000");
|
||||
ewts2uni_test("\\ucafef01f", "\ucafef01f");
|
||||
ewts2uni_test("\\ucafeefff", "\ucafeefff");
|
||||
|
||||
ewts2uni_test("\\uffffffff", "\uffffffff");
|
||||
ewts2uni_test("\\ueeeeeee2", "\ueeeeeee2");
|
||||
}
|
||||
|
||||
ewts2uni_test("\\u00000000", "\u00000000");
|
||||
ewts2uni_test("\\u00000eff", "\u00000eff");
|
||||
ewts2uni_test("\\u00000eff", "\u00000eff");
|
||||
ewts2uni_test("\\u00000f00", "\u00000f00");
|
||||
ewts2uni_test("\\u00000f40", "\u00000f40");
|
||||
ewts2uni_test("\\u00000f70", "\u00000f70");
|
||||
ewts2uni_test("\\u00000fff", "\u00000fff");
|
||||
ewts2uni_test("\\u0000f000", "\u0000f000");
|
||||
ewts2uni_test("\\u0000f01f", "\u0000f01f");
|
||||
ewts2uni_test("\\u0000efff", "\u0000efff");
|
||||
|
||||
ewts2uni_test("\\u00000000", "\u0000");
|
||||
ewts2uni_test("\\u00000eff", "\u0eff");
|
||||
ewts2uni_test("\\u00000eff", "\u0eff");
|
||||
ewts2uni_test("\\u00000f00", "\u0f00");
|
||||
ewts2uni_test("\\u00000f40", "\u0f40");
|
||||
ewts2uni_test("\\u00000f70", "\u0f70");
|
||||
ewts2uni_test("\\u00000fff", "\u0fff");
|
||||
ewts2uni_test("\\u0000f000", "\uf000");
|
||||
ewts2uni_test("\\u0000f01f", "\uf01f");
|
||||
ewts2uni_test("\\u0000efff", "\uefff");
|
||||
|
||||
assert_EWTS_error("\\UcaFe0000");
|
||||
if (false) { // TODO(dchandler): make these work
|
||||
ewts2uni_test("\\UcaFe0000", "\ucaFe0000");
|
||||
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
|
||||
ewts2uni_test("\\UcaFe0eff", "\ucaFe0eff");
|
||||
ewts2uni_test("\\UcaFe0f00", "\ucaFe0f00");
|
||||
ewts2uni_test("\\UcaFe0f40", "\ucaFe0f40");
|
||||
ewts2uni_test("\\UcaFe0f70", "\ucaFe0f70");
|
||||
ewts2uni_test("\\UcaFe0fff", "\ucaFe0fff");
|
||||
ewts2uni_test("\\UcaFef000", "\ucaFef000");
|
||||
ewts2uni_test("\\UcaFef01f", "\ucaFef01f");
|
||||
ewts2uni_test("\\UcaFeefff", "\ucaFeefff");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: test that "\[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]uxxxx " works out well
|
||||
|
||||
/** Tests that certain strings are not legal EWTS. */
|
||||
public void test__EWTS__illegal_things() {
|
||||
assert_EWTS_error("k\\u0f19"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f18"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f3e"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f3f"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("m+");
|
||||
|
||||
assert_EWTS_error("kSha"); // use "k+Sha" instead
|
||||
|
||||
|
@ -763,7 +917,27 @@ public class EWTSTest extends TestCase {
|
|||
assert_EWTS_error("al-I");
|
||||
|
||||
assert_EWTS_error("g..ya"); // use "g.ya" instead
|
||||
assert_EWTS_error("m..");
|
||||
assert_EWTS_error("g"); // use "ga" instead TODO(DLC)[EWTS->Tibetan]:?
|
||||
|
||||
assert_EWTS_error("k\\u0f19"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f18"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f3e"); // only numbers combine with f19,f18,f3e,f3f
|
||||
assert_EWTS_error("k\\u0f3f"); // only numbers combine with f19,f18,f3e,f3f
|
||||
}
|
||||
|
||||
public void testDLCFailingNow() { // TODO(DLC)[EWTS->Tibetan]
|
||||
assert_EWTS_error("\\u0f19");
|
||||
assert_EWTS_error("\\u0f18");
|
||||
assert_EWTS_error("\\u0f19\u0f20"); // wrong order...
|
||||
|
||||
{
|
||||
ewts2uni_test("'a+r-i", "\u0f60\u0fb2\u0f80"); // TODO(DLC)[EWTS->Tibetan]: NOW: prefix rules should make this invalid!
|
||||
ewts2uni_test("'a+r-I", "\u0f60\u0fb2\u0f81");
|
||||
ewts2uni_test("'a+l-i", "\u0f60\u0fb3\u0f80");// TODO(DLC)[EWTS->Tibetan]: NOW error handling is CRAP
|
||||
ewts2uni_test("'a+l-I", "\u0f60\u0fb3\u0f81");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -779,8 +953,6 @@ public class EWTSTest extends TestCase {
|
|||
// \u0f40\u0f7a\u0f74 is illegal (thus \u0f40\u0f74\u0f7a is
|
||||
// what you probably intended), have it find \u0f7a\u0f74.
|
||||
//
|
||||
// TODO(DLC)[EWTS->Tibetan]:: and have it find \u0f7a\u0f7a and suggest \u0f7b, etc.
|
||||
//
|
||||
// TODO(DLC)[EWTS->Tibetan]: and \u0f7f\u0f7e is probably illegal and should be switched?
|
||||
|
||||
// TODO(DLC)[EWTS->Tibetan]: flesh out \[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]u rules in lexing, is it like Java (where in Java source code, escapes are done in a pre-lexing pass)? no, right, \u0060 causes \u0060 in the output... and \u0f40a is not like ka. escapes separate tsheg bars as far as lexing is concerned, yes? But we use them (and only them, i.e. there is no other transliteration available) for some Tibetan Unicode characters, and then ka\[JAVA_SOURCE_WILL_NOT_COMPILE_WITHOUT_ME]u0fXX may need to seem Java-ish, maybe?
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue