Now {Pm} is treated like {PAm}; {Pm:} is like {PAm:}; {P:} is like {PA:}.

2003-11-30 02:06:48 +00:00 · 2003-11-30 02:06:48 +00:00 · ac412c994b
commit ac412c994b
parent e7c4cc1874
5 changed files with 56 additions and 19 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -328,7 +328,7 @@ public class ACIPConverter {
                            hasErrors = true;
                            uni = err;
                        }
-                        if (null != writer) writer.write(uni);
+                        writer.write(uni);
                    }
                    if (null != tdoc) {
                        String wylie
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -83,6 +83,9 @@ public class ACIPRules {
                
                // Keep this code in sync with getWylieForACIPVowel.
            }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
        }
        return (acipVowels.contains(s));
    }
@ -276,6 +279,10 @@ public class ACIPRules {
                putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
                putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
            }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
+            // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
        }
        return (String)acipVowel2wylie.get(acip);
    }
@ -475,12 +482,13 @@ public class ACIPRules {
            superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
            // :m does not appear, though you'd think it's as valid as m:.

-            // I doubt these will occur alone:
            superACIP2unicode.put("m", "\u0F7E");
            superACIP2unicode.put(":", "\u0F7F");
+            superACIP2unicode.put("m:", "\u0F7E\u0F7F");

            superACIP2unicode.put("Am", "\u0F7E");
            superACIP2unicode.put("A:", "\u0F7F");
+            superACIP2unicode.put("Am:", "\u0F7E\u0F7F");

            superACIP2unicode.put("0", "\u0F20");
            superACIP2unicode.put("1", "\u0F21");
@ -567,12 +575,11 @@ public class ACIPRules {

        if (vowel.indexOf('m') >= 0) {
            DuffCode last = (DuffCode)duff.get(duff.size() - 1);
-            duff.remove(duff.size() - 1);
+            duff.remove(duff.size() - 1); // getBindu will add it back...
            TibTextUtils.getBindu(duff, last);
        }
        if (vowel.indexOf(':') >= 0)
            duff.add(TibetanMachineWeb.getGlyph("H"));
-
    }

    /** Returns true if and only if l is the ACIP representation of a
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -656,13 +656,13 @@ tstHelper("KA'", "[(K . A), (' . )]",
                      "{S+P+YO}{M+S}",
                      "{S+P+YO}{M}{S}",
                  });
-        tstHelper(":'AO", "[(: . -), (' . ), (A . O)]");
-        tstHelper("m'AO", "[(m . -), (' . ), (A . O)]");
-        tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]");
+        tstHelper(":'AO", "[( . A:), (' . ), (A . O)]");
+        tstHelper("m'AO", "[( . Am), (' . ), (A . O)]");
+        tstHelper("m:'AO", "[( . Am:), (' . ), (A . O)]");
        tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" });
        tstHelper("KE:", "[(K . E:)]");
-        tstHelper("K:", "[(K . ), (: . )]",
-                  new String[] { /* No parses exist. "K:" is illegal. */ });
+        tstHelper("K:", "[(K . A:)]",
+                  new String[] { "{KA:}" });
        tstHelper("'AO", "[(' . ), (A . O)]");
        tstHelper("'AOM", "[(' . ), (A . O), (M . )]");

@ -717,8 +717,8 @@ tstHelper("KA'", "[(K . A), (' . )]",

        tstHelper("TAA", "[(T . ), (A . A)]");
        tstHelper("DAA", "[(D . ), (A . A)]");
-        tstHelper("DAAm", "[(D . ), (A . Am)]");
-        tstHelper("DAAm:", "[(D . ), (A . Am:)]");
+        tstHelper("DAAm", "[(D . A), (A . Am)]");
+        tstHelper("DAAm:", "[(D . A), (A . Am:)]");
        tstHelper("DA'im:", "[(D . A), (' . im:)]");

        tstHelper("NA+YA", "[(N . +), (Y . A)]");
@ -7196,6 +7196,8 @@ tstHelper("ZUR");

    /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
    public void testScanner() {
+        shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
+
        shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");

        shelp("LA...SGRUB",
@ -7416,6 +7418,7 @@ G+NA
 MNA'
 M+NA
 */
+        uhelp("B+NA", "\u0f56\u0fa3");
        uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
        uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
        uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
@ -7438,9 +7441,10 @@ M+NA
        uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
        uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
        uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
-        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP : because : is not an ACIP consonant]");
-        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP m because m is not an ACIP consonant]");
+        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
+        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");

+        uhelp("N+YA", "\u0f53\u0fb1");
        uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
        uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
        uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
@ -7503,6 +7507,17 @@ M+NA
        uhelp("WWA", "\u0f5d\u0fba");
        uhelp("W+WA", "\u0f5d\u0fba");

+        tstHelper("Km:", "{KAm:}",
+                  new String[] { "{KAm:}" },
+                  new String[] { },
+                  "{KAm:}");
+        uhelp("Km:", "\u0f40\u0f7e\u0f7f");
+        uhelp("KAm:", "\u0f40\u0f7e\u0f7f");
+        uhelp("Km", "\u0f40\u0f7e");
+        uhelp("KAm", "\u0f40\u0f7e");
+        uhelp("K:", "\u0f40\u0f7f");
+        uhelp("KA:", "\u0f40\u0f7f");
+
        uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D");
        uhelp("*#HUm: G+DHOO GRO`;.,",
              "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -213,12 +213,14 @@ class TPair {
    void getUnicode(StringBuffer sb, boolean subscribed) {
        if (null != getLeft()) {
            String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
-            if (null != x) sb.append(x);
+            if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni");
+            sb.append(x);
        }
        if (null != getRight()
-            && !("-".equals(getRight()) || "A".equals(getRight()))) {
+            && !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
            String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
-            if (null != x) sb.append(x);
+            if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
+            sb.append(x);
        }
    }

--- a/source/org/thdl/tib/text/ttt/TPairListFactory.java
+++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java
@ -95,7 +95,7 @@ class TPairListFactory {
     *  'ANG" circumstances
     *  @param weHaveSeenVowelAlready true if and only if, in our
     *  recursion, we've already found one vowel (not a disambiguator,
-     *  but a vowel like "A", "E", "Um:", "'U", etc.) */
+     *  but a vowel like "A", "E", "Um:", "m", "'U", etc.) */
    private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {

        // base case for our recursion:
@ -212,7 +212,11 @@ class TPairListFactory {
        }
        for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
            String t = null;
-            if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) {
+            if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))
+                // Or these, which we massage into "Am", "Am:", and
+                // "A:" because I didn't think {Pm} should be treated
+                // like {PAm} originally:
+                || "m".equals(t) || "m:".equals(t) || ":".equals(t)) {
                r = t;
                break;
            }
@ -227,6 +231,14 @@ class TPairListFactory {
            return new TPair(l, "+");
        }

+        // Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:.
+        int mod = 0;
+        if ("m".equals(r)) { r = "Am"; mod = -1; }
+        if (":".equals(r)) { r = "A:"; mod = -1; }
+        if ("m:".equals(r)) { r = "Am:"; mod = -1; }
+        if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
+
+
        // what if we see a character that's not part of any vowel or
        // consonant?  We return it.
        if (null == l && null == r) {
@ -236,7 +248,8 @@ class TPairListFactory {
        }

        howMuch[0] = (((l == null) ? 0 : l.length())
-                      + ((r == null) ? 0 : r.length()));
+                      + ((r == null) ? 0 : r.length())
+                      + mod);
        return new TPair(l, r);
    }
 }