Fix for part of bug 998476.

2005-02-05 22:16:39 +00:00 · 2005-02-05 22:16:39 +00:00 · 287fc181a0
commit 287fc181a0
parent be632e1874
3 changed files with 55 additions and 64 deletions
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@ -1321,6 +1321,36 @@ public class DuffPaneTest extends DuffPaneTestBase {
           keyboard really must use its technology.
        */
    }
+
+    /** Tests this part of bug 998476:
+
+<pre>
+TMW -> EWTS conversion errors
+The following incorrect conversion are happening:
+
+dga'o -> dag'o
+'da'i -> 'ad'i
+
+'da'i should produce 'da'i since the genitive particle 'i can
+only be appended to syllables that end with vowel or
+with a chung ('). For instance sgra + 'i = sgra'i, nam
+mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
+syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
+
+Of course this is a hypothetical syllable with no meaning,
+but following the rules 'ad would be correct. 'da would be
+mistaken. "In two-lettered words, the first is always the
+root letter." (losang thonden's modern tibetan language,
+pag 41).
+</pre>
+
+     */
+    public void testBug998476() {
+        enableEWTSKeyboard();
+        e("dga'o");
+        e("'da'i");
+    }
+
 }
 // FIXME: EWTS needs a list of "native" stacks in it.

--- a/source/org/thdl/tib/input/TinyTest.java
+++ b/source/org/thdl/tib/input/TinyTest.java
@ -41,35 +41,12 @@ public class TinyTest extends DuffPaneTestBase {
        super(a0);
    }

-    /** Tests this part of bug 998476:
-
-<pre>
-TMW -> EWTS conversion errors
-The following incorrect conversion are happening:
-
-dga'o -> dag'o
-bsad -> bas.d
-'da'i -> 'ad'i
-
-'da'i should produce 'da'i since the genitive particle 'i can
-only be appended to syllables that end with vowel or
-with a chung ('). For instance sgra + 'i = sgra'i, nam
-mkha' + 'i = nam mkha'i. The case here is 'da' + 'i = 'da'i.
-syllable 'ad can't take the genitive 'i, so 'ad'i is invalid.
-
-Of course this is a hypothetical syllable with no meaning,
-but following the rules 'ad would be correct. 'da would be
-mistaken. "In two-lettered words, the first is always the
-root letter." (losang thonden's modern tibetan language,
-pag 41).
-</pre>
-
+    /** Tests part of bug 998476.
     */
    public void testBug998476() {
        enableEWTSKeyboard();
-        e("dga'o", "dag'o"); // FIXME: this is a bug
-        e("bsad", "bas.d"); // FIXME: this is a bug
-        e("'da'i", "'ad'i"); // FIXME: this is a bug
+        // DLC FIXME 998476: make e("bsad") pass.
+        e("lM");
    }
 }

--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -945,7 +945,7 @@ public class TibTextUtils implements THDLWylieConstants {
        TranslitList ans
            = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
        if (debug && warnings.length() > 0)
-            System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
+            System.out.println("DEBUG:     warnings in TMW->Wylie: " + warnings);
        return ans;
    }

@ -1327,8 +1327,6 @@ public class TibTextUtils implements THDLWylieConstants {
        <ul>
          <li>one or more numbers</li>

-          <li>a single, possibly adorned consonant stack</li>
-
          <li>a legal "tyllable" appended with zero or more particles
              from the set { 'i, 'o, 'u, 'e, 'ang, 'am }</li>
        </ul>
@ -1374,6 +1372,7 @@ public class TibTextUtils implements THDLWylieConstants {
        TGCList gcs
            = breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
        String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
+        if (debug) System.out.println("DEBUG: tsheg bar classification is " + candidateType);
        int sz = gcs.size();
        if (candidateType == "invalid"
            || candidateType == "single-sanskrit-gc") {
@ -1422,7 +1421,9 @@ public class TibTextUtils implements THDLWylieConstants {
            int leftover = sz + 1;

            // Appendaged vs. not appendaged?  it affects nothing at
-            // this stage except for pa'm vs. pa'am.
+            // this stage except for pa'm vs. pa'am and
+            // appendaged-prefix/root-root/suffix (e.g., 'ad'i
+            // (incorrect) vs. 'da'i (correct)).
            boolean appendaged = (candidateType.startsWith("appendaged-"));
            candidateType = getCandidateTypeModuloAppendage(candidateType);

@ -1512,7 +1513,8 @@ public class TibTextUtils implements THDLWylieConstants {
                                              fontSize);
                }
            } else if ("root" == candidateType
-                       || "prefix/root-root/suffix" == candidateType
+                       || (!appendaged
+                           && "prefix/root-root/suffix" == candidateType)
                       || "prefix/root" == candidateType
                       || "root-suffix-postsuffix" == candidateType
                       || "root-suffix" == candidateType) {
@ -1526,7 +1528,7 @@ public class TibTextUtils implements THDLWylieConstants {
                                     == ((TGCPair)gcs.get(0)).classification);
                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1),
                                          fontSize);
-                    if (debug) System.out.println("DEBUG: appending vowel");
+                    if (debug) System.out.println("DEBUG: appending vowel 2");
                } else {
                    if (debug) System.out.println("DEBUG: already has vowel 2");
                }
@ -1545,6 +1547,8 @@ public class TibTextUtils implements THDLWylieConstants {
                }
            } else if ("prefix-root-suffix" == candidateType
                       || "prefix-root" == candidateType
+                       || (appendaged
+                           && "prefix/root-root/suffix" == candidateType)
                       || "prefix-root-suffix-postsuffix" == candidateType) {
                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
@ -1568,7 +1572,7 @@ public class TibTextUtils implements THDLWylieConstants {
                    != TGCPair.CONSONANTAL_WITH_VOWEL) {
                    ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
                                     == ((TGCPair)gcs.get(1)).classification);
-                    if (debug) System.out.println("DEBUG: appending vowel");
+                    if (debug) System.out.println("DEBUG: appending vowel 1");
                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2),
                                          fontSize);
                } else {
@ -1648,22 +1652,16 @@ public class TibTextUtils implements THDLWylieConstants {
 * @return the Extended Wylie/ACIP corresponding to these glyphs (with
 * font size info), or null */
    private static TranslitList getTranslitImplementation(boolean EWTSNotACIP,
-                                                            SizedDuffCode[] dcs,
-                                                            boolean noSuch[],
-                                                            StringBuffer warnings) {
-        if (dcs.length == 0)
-            return null;
-
+                                                          SizedDuffCode[] dcs,
+                                                          boolean noSuch[],
+                                                          StringBuffer warnings) {
+        // DLC FIXME: "    " should become " " for ACIP
        ArrayList glyphList = new ArrayList();
        TranslitList translitBuffer = new TranslitList();
-
-        // DLC FIXME: "    " should become " " for ACIP
        for (int i = 0; i < dcs.length; i++) {
            char ch = dcs[i].getDuffCode().getCharacter();
-            int k = dcs[i].getDuffCode().getCharNum();
            int fsz = dcs[i].getFontSize();
-
-            if (k < 32) {
+            if ((int)ch < 32) { // 32 is space, ' '
                if (!glyphList.isEmpty()) {
                    getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
                                        warnings, translitBuffer);
@ -1671,7 +1669,6 @@ public class TibTextUtils implements THDLWylieConstants {
                    if (null != warnings)
                        warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
                }
-
                // In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
                // indicates a real line break.
                if (!EWTSNotACIP && '\n' == ch) {
@ -1682,7 +1679,7 @@ public class TibTextUtils implements THDLWylieConstants {
                        translitBuffer.append(ch, fsz);
                }
                translitBuffer.append(ch, fsz);
-            } else {
+            } else { // (int)ch >= 32
                String wylie
                    = TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(),
                                                         noSuch);
@ -1691,9 +1688,7 @@ public class TibTextUtils implements THDLWylieConstants {
                    // U+0F04 and U+0F05 -- these require lookahead to
                    // see if the ACIP is # (two shishes) or * (one
                    // swish)
-                    
                    int howManyConsumed[] = new int[] { -1 /* invalid */ };
-
                    acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(),
                                                             ((i+1<dcs.length)
                                                              ? dcs[i+1].getDuffCode()
@ -1703,14 +1698,9 @@ public class TibTextUtils implements THDLWylieConstants {
                                                              : null),
                                                             noSuch,
                                                             howManyConsumed);
-                    if (howManyConsumed[0] == 1) {
-                        // nothing to do
-                    } else if (howManyConsumed[0] == 2) {
-                        ++i;
-                    } else {
-                        ThdlDebug.verify(howManyConsumed[0] == 3);
-                        ++i; ++i;
-                    }
+                    ThdlDebug.verify(howManyConsumed[0] <= 3
+                                     && howManyConsumed[0] >= 1);
+                    i += howManyConsumed[0] - 1;
                }
                if (TibetanMachineWeb.isWyliePunc(wylie)
                    && !TibetanMachineWeb.isWylieAdornment(wylie)) {
@ -1725,10 +1715,9 @@ public class TibTextUtils implements THDLWylieConstants {
                    glyphList.add(dcs[i]);
                }
            }
-        }
+        } // for

        // replace remaining TMW with transliteration
-
        if (!glyphList.isEmpty()) {
            getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
                                warnings, translitBuffer);
@ -1736,12 +1725,7 @@ public class TibTextUtils implements THDLWylieConstants {
            if (null != warnings)
                warnings.append("The stretch of Tibetan ended without final punctuation.");
        }
-
-        if (translitBuffer.length() > 0) {
-            return translitBuffer;
-        } else {
-            return null;
-        }
+        return ((translitBuffer.length() > 0) ? translitBuffer : null);
    }

    /** Returns "root" instead of "appendaged-root", for example. */