Better, but still flawed, handling of EWTS [^] (i.e., U+0F39).

2005-07-06 22:26:55 +00:00 · 2005-07-06 22:26:55 +00:00 · b74af71efc
commit b74af71efc
parent f5d87ab226
3 changed files with 31 additions and 26 deletions
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -265,7 +265,7 @@ R~233,5~~10,104~1,109~~1,123~1,125~~~0FBC
 // horizontal stroke goes upward on U+0F5F and downward on U+0F5B.
 \uF031~146,5~~10,42~~~~~~~none
 f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
-v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
+v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,0F39
 T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
 Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
 D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
@ -1201,4 +1201,3 @@ r~176,4~~8,71~~~~~~~0FB2
 \tmw2059~57,2~~3,59~1,109~4,120~1,125~1,123~4,106~4,113~f88,f90
 // utsama kha:
 \tmw2060~58,2~~3,60~1,109~4,120~1,125~1,123~4,106~4,113~f88,f91
-
--- a/source/org/thdl/tib/text/ttt/EWTSTest.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTest.java
@ -148,11 +148,27 @@ public class EWTSTest extends TestCase {
        }
    }

+    public void test0F39() {
+        ewts2uni_test("v", "\u0F56\u0F39");
+        ewts2uni_test("f", "\u0F55\u0F39");
+        ewts2uni_test("a^", "\u0f68\u0f39");
+        ewts2uni_test("hUM^", "\u0f67\u0f71\u0f74\u0f7e\u0f39");
+        ewts2uni_test("ph^", "\u0f55\u0f39");
+        ewts2uni_test("phe^", "\u0f55\u0f7a\u0f39");  // TODO(DLC)[EWTS->Tibetan]: does order of U+0F39 matter?
+        ewts2uni_test("ph^e", "\u0f55\u0f39\u0f68\u0f7a");  // TODO(DLC)[EWTS->Tibetan]: This is no good!  We don't even warn, do we!?
+        ewts2uni_test("f+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
+        ewts2uni_test("faM", "\u0f55\u0f39\u0f7e");
+        ewts2uni_test("vaM", "\u0f56\u0f39\u0f7e");
+        ewts2uni_test("k+fa", "\u0f40\u0fa5\u0f39");
+        ewts2uni_test("f+va", "\u0f55\u0f39\u0fa6\u0f39");
+        ewts2uni_test("ph+veM", "\u0f55\u0fa6\u0f39\u0f7a\u0f7e");
+        if (RUN_FAILING_TESTS) ewts2uni_test("ph^+beM", "\u0f55\u0f39\u0fa6\u0f7a\u0f7e");
+    }
+
    /** Tests that the EWTS->unicode converter isn't completely
        braindead. */
    public void testEwtsBasics() {
        ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66");
-        if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
        ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
        ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
        ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
@ -690,7 +706,7 @@ public class EWTSTest extends TestCase {
        ewts2uni_test("\\u0F36", "\u0F36");
        if (RUN_FAILING_TESTS) ewts2uni_test("X", "\u0F37"); // TODO(DLC)[EWTS->Tibetan]: error combiner
        ewts2uni_test("\\u0F38", "\u0F38");
-        if (RUN_FAILING_TESTS) ewts2uni_test("^", "\u0F39"); // TODO(DLC)[EWTS->Tibetan]: error combiner
+        assert_EWTS_error("^");  // If you want \u0f68\u0f39, use [a^]
        ewts2uni_test("<", "\u0F3A");
        ewts2uni_test(">", "\u0F3B");
        ewts2uni_test("(", "\u0F3C");
--- a/source/org/thdl/tib/text/ttt/EWTSTraits.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java
@ -68,11 +68,10 @@ public final class EWTSTraits implements TTraits {
     *  three. */
    public int maxConsonantLength() { return 3; }

-    /** {-i~M`}, in a tie for the longest wowel, has 5 characters, so
-     *  this is five.  (No, 'l-i' and 'r-i' are not wowels (but '-i'
-     *  is).  (TODO(DLC)[EWTS->Tibetan]: this is crap!  you can put arbitrary wowels
-     *  together using plus signs or Unicode escapes) */
-    public int maxWowelLength() { return 3; /* a~M`  (TODO(DLC)[EWTS->Tibetan]:!  why the 'a'?) */}
+    /** Wowels can be arbitrarily long via stacking.  But each
+     *  component is no longer, in characters, than this.  [~M`] is
+     *  the current winner. */
+    public int maxWowelLength() { return 3; }

    public boolean isUnicodeConsonant(char ch) {
        return ((ch != '\u0f48' && ch >= '\u0f40' && ch <= '\u0f6a')
@ -86,6 +85,7 @@ public final class EWTSTraits implements TTraits {
    public boolean isUnicodeWowel(char ch) {
    	// TODO(DLC)[EWTS->Tibetan]: what about combiners that combine only with digits?  TEST
        return ((ch >= '\u0f71' && ch <= '\u0f84')
+                || '\u0f39' == ch
                || isUnicodeWowelThatRequiresAChen(ch));
    }

@ -269,21 +269,15 @@ public final class EWTSTraits implements TTraits {
            if ("-I".equals(wowel)) return "\u0f81";
            if ("I".equals(wowel)) return "\u0f71\u0f72"; // \u0f73 is discouraged

-            // TODO(DLC)[EWTS->Tibetan]: fix me!
-                // DLC say ah        if ("aM".equals(wowel)) return "\u0f7e";
+            // TODO(DLC)[EWTS->Tibetan]: test, test, test.
            if ("M".equals(wowel)) return "\u0f7e";
-            // DLC say ah        if ("aH".equals(wowel)) return "\u0f7f";
            if ("H".equals(wowel)) return "\u0f7f";
-            // DLC say ah        if ("a?".equals(wowel)) return "\u0f84";
            if ("?".equals(wowel)) return "\u0f84";
-            // DLC say ah        if ("a~M".equals(wowel)) return "\u0f83";
            if ("~M".equals(wowel)) return "\u0f83";
-            // DLC say ah        if ("a~M`".equals(wowel)) return "\u0f82";
            if ("~M`".equals(wowel)) return "\u0f82";
-            // DLC say ah        if ("aX".equals(wowel)) return "\u0f37";
            if ("X".equals(wowel)) return "\u0f37";
-            // DLC say ah        if ("a~X".equals(wowel)) return "\u0f35";
            if ("~X".equals(wowel)) return "\u0f35";
+            if ("^".equals(wowel)) return "\u0f39";

            return null;
        }
@ -362,6 +356,9 @@ public final class EWTSTraits implements TTraits {
            if ("h".equals(l)) return "\u0FB7";
            if ("a".equals(l)) return "\u0FB8";
            if ("k+Sh".equals(l)) return "\u0FB9";
+
+            if ("f".equals(l)) return "\u0FA5\u0F39";
+            if ("v".equals(l)) return "\u0FA6\u0F39";
            return null;
        } else {
            if ("R".equals(l)) return "\u0f6a";
@ -426,17 +423,10 @@ public final class EWTSTraits implements TTraits {
    public boolean isWowelThatRequiresAChen(String s) {
        // TODO(DLC)[EWTS->Tibetan]: fix me!
        return ((s.length() == 1 && (isUnicodeWowelThatRequiresAChen(s.charAt(0))
-                                     || "?MHX".indexOf(s.charAt(0)) >= 0))
-                // DLC say ah                || "aM".equals(s) // DLC funny...  (DLC NOW too funny! affects longest wowel length!)
-                // DLC say ah                || "a?".equals(s) // DLC funny...
-                // DLC say ah                || "aH".equals(s) // DLC funny...
-                // DLC say ah                || "aX".equals(s) // DLC funny...
+                                     || "?MHX^".indexOf(s.charAt(0)) >= 0))
                || "~X".equals(s)
-                // DLC say ah                || "a~X".equals(s) // DLC funny...
                || "~M".equals(s)
-                // DLC say ah                || "a~M".equals(s) // DLC funny...
                || "~M`".equals(s)
-                // DLC say ah                || "a~M`".equals(s) // DLC funny...
                );
    }