Better, but still flawed, handling of EWTS [^] (i.e., U+0F39).

This commit is contained in:
dchandler 2005-07-06 22:26:55 +00:00
parent f5d87ab226
commit b74af71efc
3 changed files with 31 additions and 26 deletions

View file

@ -265,7 +265,7 @@ R~233,5~~10,104~1,109~~1,123~1,125~~~0FBC
// horizontal stroke goes upward on U+0F5F and downward on U+0F5B.
\uF031~146,5~~10,42~~~~~~~none
f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,0F39
T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
@ -1201,4 +1201,3 @@ r~176,4~~8,71~~~~~~~0FB2
\tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
// utsama kha:
\tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91

View file

@ -148,11 +148,27 @@ public class EWTSTest extends TestCase {
}
}
public void test0F39() {
ewts2uni_test("v", "\u0F56\u0F39");
ewts2uni_test("f", "\u0F55\u0F39");
ewts2uni_test("a^", "\u0f68\u0f39");
ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
ewts2uni_test("ph^", "\u0f55\u0f39");
ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39"); // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a"); // TODO(DLC)[EWTS->Tibetan]: This is no good! We don't even warn, do we!?
ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
ewts2uni_test("faM", "\u0f55\u0f39\u0f7e");
ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e");
ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
}
/** Tests that the EWTS->unicode converter isn't completely
braindead. */
public void testEwtsBasics() {
ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66");
if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
@ -690,7 +706,7 @@ public class EWTSTest extends TestCase {
ewts2uni_test("\\u0F36", "\u0F36");
if (RUN_FAILING_TESTS) ewts2uni_test("X", "\u0F37"); // TODO(DLC)[EWTS->Tibetan]: error combiner
ewts2uni_test("\\u0F38", "\u0F38");
if (RUN_FAILING_TESTS) ewts2uni_test("^", "\u0F39"); // TODO(DLC)[EWTS->Tibetan]: error combiner
assert_EWTS_error("^"); // If you want \u0f68\u0f39, use [a^]
ewts2uni_test("<", "\u0F3A");
ewts2uni_test(">", "\u0F3B");
ewts2uni_test("(", "\u0F3C");

View file

@ -68,11 +68,10 @@ public final class EWTSTraits implements TTraits {
* three. */
public int maxConsonantLength() { return 3; }
/** {-i~M`}, in a tie for the longest wowel, has 5 characters, so
* this is five. (No, 'l-i' and 'r-i' are not wowels (but '-i'
* is). (TODO(DLC)[EWTS->Tibetan]: this is crap! you can put arbitrary wowels
* together using plus signs or Unicode escapes) */
public int maxWowelLength() { return 3; /* a~M` (TODO(DLC)[EWTS->Tibetan]:! why the 'a'?) */}
/** Wowels can be arbitrarily long via stacking. But each
* component is no longer, in characters, than this. [~M`] is
* the current winner. */
public int maxWowelLength() { return 3; }
public boolean isUnicodeConsonant(char ch) {
return ((ch != '\u0f48' && ch >= '\u0f40' && ch <= '\u0f6a')
@ -86,6 +85,7 @@ public final class EWTSTraits implements TTraits {
public boolean isUnicodeWowel(char ch) {
// TODO(DLC)[EWTS->Tibetan]: what about combiners that combine only with digits? TEST
return ((ch >= '\u0f71' && ch <= '\u0f84')
|| '\u0f39' == ch
|| isUnicodeWowelThatRequiresAChen(ch));
}
@ -269,21 +269,15 @@ public final class EWTSTraits implements TTraits {
if ("-I".equals(wowel)) return "\u0f81";
if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged
// TODO(DLC)[EWTS->Tibetan]: fix me!
// DLC say ah if ("aM".equals(wowel)) return "\u0f7e";
// TODO(DLC)[EWTS->Tibetan]: test, test, test.
if ("M".equals(wowel)) return "\u0f7e";
// DLC say ah if ("aH".equals(wowel)) return "\u0f7f";
if ("H".equals(wowel)) return "\u0f7f";
// DLC say ah if ("a?".equals(wowel)) return "\u0f84";
if ("?".equals(wowel)) return "\u0f84";
// DLC say ah if ("a~M".equals(wowel)) return "\u0f83";
if ("~M".equals(wowel)) return "\u0f83";
// DLC say ah if ("a~M`".equals(wowel)) return "\u0f82";
if ("~M`".equals(wowel)) return "\u0f82";
// DLC say ah if ("aX".equals(wowel)) return "\u0f37";
if ("X".equals(wowel)) return "\u0f37";
// DLC say ah if ("a~X".equals(wowel)) return "\u0f35";
if ("~X".equals(wowel)) return "\u0f35";
if ("^".equals(wowel)) return "\u0f39";
return null;
}
@ -362,6 +356,9 @@ public final class EWTSTraits implements TTraits {
if ("h".equals(l)) return "\u0FB7";
if ("a".equals(l)) return "\u0FB8";
if ("k+Sh".equals(l)) return "\u0FB9";
if ("f".equals(l)) return "\u0FA5\u0F39";
if ("v".equals(l)) return "\u0FA6\u0F39";
return null;
} else {
if ("R".equals(l)) return "\u0f6a";
@ -426,17 +423,10 @@ public final class EWTSTraits implements TTraits {
public boolean isWowelThatRequiresAChen(String s) {
// TODO(DLC)[EWTS->Tibetan]: fix me!
return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0))
|| "?MHX".indexOf(s.charAt(0)) >= 0))
// DLC say ah || "aM".equals(s) // DLC funny... (DLC NOW too funny! affects longest wowel length!)
// DLC say ah || "a?".equals(s) // DLC funny...
// DLC say ah || "aH".equals(s) // DLC funny...
// DLC say ah || "aX".equals(s) // DLC funny...
|| "?MHX^".indexOf(s.charAt(0)) >= 0))
|| "~X".equals(s)
// DLC say ah || "a~X".equals(s) // DLC funny...
|| "~M".equals(s)
// DLC say ah || "a~M".equals(s) // DLC funny...
|| "~M`".equals(s)
// DLC say ah || "a~M`".equals(s) // DLC funny...
);
}