Fixed important EWTS->Tibetan conversion bugs. [g.yogs] (and maybe

[hUM^]) are not yet converting correctly. I have not yet committed the end-to-end test that I'm manually doing to find these problems. It will be another document for TMW_RTF_TO_THDL_WYLIETest.java. Note that thdl.debug=true is essential to access the GUI for the EWTS->* converters.
2005-07-06 07:46:21 +00:00 · 2005-07-06 07:46:21 +00:00 · 63ff0fb0c9
commit 63ff0fb0c9
parent 0b3a636f63
7 changed files with 101 additions and 40 deletions
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -149,7 +149,7 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("TibetanConverter --colors yes|no");
                out.println("                 --warning-level None|Some|Most|All");
-                out.println("                 --acip-to-tibetan-warning-and-error-messages short|long");
+                out.println("                 --acip-to-tibetan-warning-and-error-messages short|long");  // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected
                out.println("                 --find-all-non-tmw | --find-some-non-tmw");
                out.println("                   | --tmw-to-tmw-for-testing");
                out.println("                   | --to-tibetan-machine | --to-tibetan-machine-web");
@ -166,6 +166,10 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("");
                out.println(" -h | --help for this message");
                out.println("");
                out.println(" --wylie-to-unicode to convert an EWTS text file to a Unicode");
                out.println("");
                out.println(" --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb");
                out.println("");
                out.println(" --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine");
                out.println("");
                out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
--- a/source/org/thdl/tib/text/ttt/EWTSTest.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTest.java
@ -151,6 +151,12 @@ public class EWTSTest extends TestCase {
    /** Tests that the EWTS->unicode converter isn't completely
        braindead. */
    public void testEwtsBasics() {
        if (RUN_FAILING_TESTS) ewts2uni_test("g.yogs", "\u0f42\u0f61\u0f7c\u0f42\u0f66"); // TODO(DLC)[EWTS->Tibetan]: DLC NOW!
        if (RUN_FAILING_TESTS) ewts2uni_test("hUM^", "TODO(DLC)[EWTS->Tibetan]: DLC NOW");
        ewts2uni_test("brgyad", "\u0f56\u0f62\u0f92\u0fb1\u0f51");
        ewts2uni_test("brjod", "\u0f56\u0f62\u0f97\u0f7c\u0f51");
        ewts2uni_test("drwa", "\u0f51\u0fb2\u0fad");
        ewts2uni_test("rwa", "\u0f62\u0fad");
        ewts2uni_test("ug_pha ", "\u0f68\u0f74\u0f42\u00a0\u0f55\u0f0b");
        ewts2uni_test("a ", "\u0f68\u0f0b");
        ewts2uni_test("g.a ", "\u0f42\u0f68\u0f0b");
--- a/source/org/thdl/tib/text/ttt/EWTSTraits.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java
@ -211,7 +211,9 @@ public final class EWTSTraits implements TTraits {
        if (wowel.indexOf('M') >= 0) {
            DuffCode last = null;
-            if (duff.size() > 0) {
+            if (!context_added[0]) {
                last = preceding;
            } else if (duff.size() > 0) {
                last = (DuffCode)duff.get(duff.size() - 1);
                duff.remove(duff.size() - 1); // getBindu will add it back...
                // TODO(DLC)[EWTS->Tibetan]: is this okay????  when is a bindu okay to be alone???
--- a/source/org/thdl/tib/text/ttt/TConverter.java
+++ b/source/org/thdl/tib/text/ttt/TConverter.java
@ -479,7 +479,7 @@ public class TConverter {
                                TStackList sl1 = ((null == pt1)
                                                  ? null : pt1.getBestParse());
                                if (null == sl0 && null == sl1) {
-                                    // {A-DZU} causes this, for example.
+                                    // ACIP {A-DZU} causes this, for example.
                                    hasErrors = true;
                                    String errorMessage =
                                        "[#ERROR "
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -190,17 +190,19 @@ class TPair {
    }
    String getWylie() {
-        return getWylie(false);
+        return getWylie(false, false);
    }
    /** Returns the EWTS Wylie that corresponds to this pair if
     *  justLeft is false, or the EWTS Wylie that corresponds to just
-     *  {@link #getLeft()} if justLeft is true.
+     *  {@link #getLeft()} if justLeft is true.  If dropDisambiguator
     *  is true and the right component is a disambiguator, then the
     *  Wylie will not contain '.'.
     *
     *  <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
     *  even though sometimes the EWTS for those is "w", "R", or "Y".
     *  Handle that in the caller. */
-    String getWylie(boolean justLeft) {
+    String getWylie(boolean justLeft, boolean dropDisambiguator) {
        String leftWylie = null;
        if (getLeft() != null) {
            leftWylie = traits.getEwtsForConsonant(getLeft());
@ -212,7 +214,7 @@ class TPair {
        if (null == leftWylie) leftWylie = "";
        if (justLeft) return leftWylie;
        String rightWylie = null;
-        if (traits.disambiguator().equals(getRight()))
+        if (!dropDisambiguator && traits.disambiguator().equals(getRight()))
            rightWylie = ".";
        else if ("+".equals(getRight()))
            rightWylie = "+";
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -701,10 +701,10 @@ class TPairList {
        int previousSize = duffsAndErrors.size();
        StringBuffer wylieForConsonant = new StringBuffer();
        for (int x = 0; x + 1 < size(); x++) {
-            wylieForConsonant.append(get(x).getWylie(false));
+            wylieForConsonant.append(get(x).getWylie(false, true));
        }
        TPair lastPair = get(size() - 1);
-        wylieForConsonant.append(lastPair.getWylie(true));
+        wylieForConsonant.append(lastPair.getWylie(true, false));
        String hashKey = wylieForConsonant.toString();
        // Because EWTS has special handling for full-formed
@ -763,6 +763,7 @@ class TPairList {
            }
        }
        if (lastPair.getRight() == null
            || lastPair.getRight().equals(traits.disambiguator())
            || lastPair.equals(traits.disambiguator())) {
            duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
        } else {
@ -771,7 +772,7 @@ class TPairList {
                                   lastPair.getRight());
        }
        if (previousSize == duffsAndErrors.size())
-            throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion.
+            throw new Error("TPairList with no duffs? " + toString() + " has hash key " + hashKey + " and previous size is " + previousSize); // FIXME: change to assertion.
    }
 }
--- a/source/org/thdl/tib/text/ttt/TPairListFactory.java
+++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java
@ -256,7 +256,14 @@ class TPairListFactory {
    /** EWTS helper function that transforms native stacks to include
     *  pluses: [(ph . ) (y . ) (w . *)] -> [(ph . +) (y . +) (w
-     *  . *)], e.g.
+     *  . *)], e.g.  The tricky case is something like [brgyad] or
     *  [brjod] because b+r is a native stack and so is r+g+y (and in
     *  fact r+g+y accepts a bao prefix).  It's not quite safe to
     *  always grab the rightmost native stack from a stretch, as
     *  [drwa] proves.  You must grab the longest, rightmost stack.
     *  In most cases, either way you did it it'd be illegal.  In the
     *  rest, the only way it can be legal is if there's a prefix and
     *  the rightmost stack.
     *  @param traits must mesh with orig */
    private static TPairList transformNativeStacks(TTraits traits,
                                                   TPairList orig) {
@ -277,8 +284,9 @@ class TPairListFactory {
            // we see a native stack of size 2, we transform it.
            boolean found_something = false;
-            TPair p[] = new TPair[maxNativeStackSize];
+            TPair p[]
-            for (int j = 0; j < maxNativeStackSize; j++) {
+                = new TPair[maxNativeStackSize + 1];  // plus one for [brgyad]
            for (int j = 0; j < maxNativeStackSize + 1; j++) {
                if (i + j < orig.size())
                    p[j] = orig.get(i + j);
                else
@ -286,35 +294,32 @@ class TPairListFactory {
            }
            // Now p[0] is current pair, p[1] is the one after that, etc.
-            for (int nss = maxNativeStackSize; nss >= minNativeStackSize;
+            if (null != p[0].getLeft()
-                 nss--) {
+                && null == p[0].getRight()) {
-                String hash_key = "";
+                // TODO(dchandler): The way I do this [drwa] case,
-                int good = 0;
+                // does it rely on the fact that maxNativeStackSize ==
-                for (int k = 0; k < nss - 1; k++) {
+                // 3?  Let's have it not rely on that...
-                    if (null != p[k]
+                int h;
-                        && null != p[k].getLeft()
+                if (0 == (h = helper(traits, 0, maxNativeStackSize, p, result))) {  // [drwa]
-                        && null == p[k].getRight()) {
+                    // [brgyad] makes us go from right to left.
-                        hash_key += p[k].getLeft() + "-";
+                    // (TODO(dchandler): It's a shame we're doing this
-                        ++good;
+                    // stuff when we have the code to figure out, for
                    // ACIP, that [BRGYAD] is what it is.)
                    for (int offset = 1; offset >= 0; offset--) {
                        if (found_something) break;
                        for (int nss = maxNativeStackSize;
                             nss >= minNativeStackSize;
                             nss--) {
                            if (0 != (h = helper(traits, offset, nss, p, result))) {
                                found_something = true;
                                i += h;
                                break;
                            }
                        }
                    }
-                }
+                } else {
-                if (null != p[nss - 1]
+                    i += h;
                    && null != p[nss - 1].getLeft()
                    && !"+".equals(p[nss - 1].getRight())) {
                    hash_key += p[nss - 1].getLeft();
                    ++good;
                }
                if (nss == good
                    && TibetanMachineWeb.isKnownHashKey(hash_key)) {
                    found_something = true;
                    for (int n = 0; n < nss - 1; n++) {
                        ++i;
                        result.append(new TPair(traits,
                                                p[n].getLeft(), "+"));
                    }
                    ++i;
                    result.append(p[nss - 1]);
                    break;  // for ph-y-w etc.
                }
            }
            if (!found_something) {
@ -328,6 +333,47 @@ class TPairListFactory {
        return result;
    }
    /** We mutate result and return the number of TPairs we scarfed if
     *  we find a native stack of size nss at p[offset], p[offset +
     *  1], ..., p[offset + nss - 1]. */
    private static int helper(TTraits traits, int offset, int nss, TPair p[],
                              TPairList result) {
        String hashKey = "";
        int good = 0;
        for (int k = 0; k < nss - 1; k++) {
            if (null != p[k + offset]
                && null != p[k + offset].getLeft()
                && null == p[k + offset].getRight()) {
                hashKey += p[k + offset].getLeft() + "-";
                ++good;
            }
        }
        if (null != p[nss - 1 + offset]
            && null != p[nss - 1 + offset].getLeft()
            && !"+".equals(p[nss - 1 + offset].getRight())) {
            hashKey += p[nss - 1 + offset].getLeft();
            ++good;
        }
        if (nss == good
            && TibetanMachineWeb.isKnownHashKey(hashKey)) {
            int i = 0;
            if (1 == offset) {
                ++i;
                result.append(p[0]);
            }
            for (int n = 0; n < nss - 1; n++) {
                ++i;
                result.append(new TPair(traits,
                                        p[n + offset].getLeft(),
                                        "+"));
            }
            ++i;
            result.append(p[nss - 1 + offset]);
            return i;
        }
        return 0;
    }
    // TODO(DLC)[EWTS->Tibetan]: doc
    private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {