From ac412c994b0ddda860c9849169462feef23ee8da Mon Sep 17 00:00:00 2001
From: dchandler <dchandler>
Date: Sun, 30 Nov 2003 02:06:48 +0000
Subject: [PATCH] Now {Pm} is treated like {PAm}; {Pm:} is like {PAm:}; {P:} is
 like {PA:}.

---
 .../org/thdl/tib/text/ttt/ACIPConverter.java  |  2 +-
 source/org/thdl/tib/text/ttt/ACIPRules.java   | 13 ++++++--
 source/org/thdl/tib/text/ttt/PackageTest.java | 33 ++++++++++++++-----
 source/org/thdl/tib/text/ttt/TPair.java       |  8 +++--
 .../thdl/tib/text/ttt/TPairListFactory.java   | 19 +++++++++--
 5 files changed, 56 insertions(+), 19 deletions(-)
diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java
index 5502841..9ebec93 100644
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@@ -328,7 +328,7 @@ public class ACIPConverter {
                             hasErrors = true;
                             uni = err;
                         }
-                        if (null != writer) writer.write(uni);
+                        writer.write(uni);
                     }
                     if (null != tdoc) {
                         String wylie
diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java
index 126775a..b97093a 100644
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@@ -83,6 +83,9 @@ public class ACIPRules {
                 
                 // Keep this code in sync with getWylieForACIPVowel.
             }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
         }
         return (acipVowels.contains(s));
     }
@@ -276,6 +279,10 @@ public class ACIPRules {
                 putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
                 putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
             }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
+            // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
         }
         return (String)acipVowel2wylie.get(acip);
     }
@@ -475,12 +482,13 @@ public class ACIPRules {
             superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
             // :m does not appear, though you'd think it's as valid as m:.
 
-            // I doubt these will occur alone:
             superACIP2unicode.put("m", "\u0F7E");
             superACIP2unicode.put(":", "\u0F7F");
+            superACIP2unicode.put("m:", "\u0F7E\u0F7F");
 
             superACIP2unicode.put("Am", "\u0F7E");
             superACIP2unicode.put("A:", "\u0F7F");
+            superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
 
             superACIP2unicode.put("0", "\u0F20");
             superACIP2unicode.put("1", "\u0F21");
@@ -567,12 +575,11 @@ public class ACIPRules {
 
         if (vowel.indexOf('m') >= 0) {
             DuffCode last = (DuffCode)duff.get(duff.size() - 1);
-            duff.remove(duff.size() - 1);
+            duff.remove(duff.size() - 1); // getBindu will add it back...
             TibTextUtils.getBindu(duff, last);
         }
         if (vowel.indexOf(':') >= 0)
             duff.add(TibetanMachineWeb.getGlyph("H"));
-
     }
 
     /** Returns true if and only if l is the ACIP representation of a
diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java
index 37db9e8..e9d8606 100644
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@@ -656,13 +656,13 @@ tstHelper("KA'", "[(K . A), (' . )]",
                       "{S+P+YO}{M+S}",
                       "{S+P+YO}{M}{S}",
                   });
-        tstHelper(":'AO", "[(: . -), (' . ), (A . O)]");
-        tstHelper("m'AO", "[(m . -), (' . ), (A . O)]");
-        tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]");
+        tstHelper(":'AO", "[( . A:), (' . ), (A . O)]");
+        tstHelper("m'AO", "[( . Am), (' . ), (A . O)]");
+        tstHelper("m:'AO", "[( . Am:), (' . ), (A . O)]");
         tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" });
         tstHelper("KE:", "[(K . E:)]");
-        tstHelper("K:", "[(K . ), (: . )]",
-                  new String[] { /* No parses exist. "K:" is illegal. */ });
+        tstHelper("K:", "[(K . A:)]",
+                  new String[] { "{KA:}" });
         tstHelper("'AO", "[(' . ), (A . O)]");
         tstHelper("'AOM", "[(' . ), (A . O), (M . )]");
 
@@ -717,8 +717,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
 
         tstHelper("TAA", "[(T . ), (A . A)]");
         tstHelper("DAA", "[(D . ), (A . A)]");
-        tstHelper("DAAm", "[(D . ), (A . Am)]");
-        tstHelper("DAAm:", "[(D . ), (A . Am:)]");
+        tstHelper("DAAm", "[(D . A), (A . Am)]");
+        tstHelper("DAAm:", "[(D . A), (A . Am:)]");
         tstHelper("DA'im:", "[(D . A), (' . im:)]");
 
         tstHelper("NA+YA", "[(N . +), (Y . A)]");
@@ -7196,6 +7196,8 @@ tstHelper("ZUR");
 
     /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
     public void testScanner() {
+        shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
+
         shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");
 
         shelp("LA...SGRUB",
@@ -7416,6 +7418,7 @@ G+NA
 MNA'
 M+NA
 */
+        uhelp("B+NA", "\u0f56\u0fa3");
         uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
         uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
         uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
@@ -7438,9 +7441,10 @@ M+NA
         uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
         uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
         uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
-        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP : because : is not an ACIP consonant]");
-        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP m because m is not an ACIP consonant]");
+        uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
+        uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");
 
+        uhelp("N+YA", "\u0f53\u0fb1");
         uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
         uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
         uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
@@ -7503,6 +7507,17 @@ M+NA
         uhelp("WWA", "\u0f5d\u0fba");
         uhelp("W+WA", "\u0f5d\u0fba");
 
+        tstHelper("Km:", "{KAm:}",
+                  new String[] { "{KAm:}" },
+                  new String[] { },
+                  "{KAm:}");
+        uhelp("Km:", "\u0f40\u0f7e\u0f7f");
+        uhelp("KAm:", "\u0f40\u0f7e\u0f7f");
+        uhelp("Km", "\u0f40\u0f7e");
+        uhelp("KAm", "\u0f40\u0f7e");
+        uhelp("K:", "\u0f40\u0f7f");
+        uhelp("KA:", "\u0f40\u0f7f");
+
         uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D");
         uhelp("*#HUm: G+DHOO GRO`;.,",
               "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
diff --git a/source/org/thdl/tib/text/ttt/TPair.java b/source/org/thdl/tib/text/ttt/TPair.java
index 1534f76..4a50fa0 100644
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@@ -213,12 +213,14 @@ class TPair {
     void getUnicode(StringBuffer sb, boolean subscribed) {
         if (null != getLeft()) {
             String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
-            if (null != x) sb.append(x);
+            if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni");
+            sb.append(x);
         }
         if (null != getRight()
-            && !("-".equals(getRight()) || "A".equals(getRight()))) {
+            && !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
             String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
-            if (null != x) sb.append(x);
+            if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
+            sb.append(x);
         }
     }
 
diff --git a/source/org/thdl/tib/text/ttt/TPairListFactory.java b/source/org/thdl/tib/text/ttt/TPairListFactory.java
index 648762d..e8daad5 100644
--- a/source/org/thdl/tib/text/ttt/TPairListFactory.java
+++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java
@@ -95,7 +95,7 @@ class TPairListFactory {
      *  'ANG" circumstances
      *  @param weHaveSeenVowelAlready true if and only if, in our
      *  recursion, we've already found one vowel (not a disambiguator,
-     *  but a vowel like "A", "E", "Um:", "'U", etc.) */
+     *  but a vowel like "A", "E", "Um:", "m", "'U", etc.) */
     private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {
 
         // base case for our recursion:
@@ -212,7 +212,11 @@ class TPairListFactory {
         }
         for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
             String t = null;
-            if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) {
+            if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))
+                // Or these, which we massage into "Am", "Am:", and
+                // "A:" because I didn't think {Pm} should be treated
+                // like {PAm} originally:
+                || "m".equals(t) || "m:".equals(t) || ":".equals(t)) {
                 r = t;
                 break;
             }
@@ -227,6 +231,14 @@ class TPairListFactory {
             return new TPair(l, "+");
         }
 
+        // Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:.
+        int mod = 0;
+        if ("m".equals(r)) { r = "Am"; mod = -1; }
+        if (":".equals(r)) { r = "A:"; mod = -1; }
+        if ("m:".equals(r)) { r = "Am:"; mod = -1; }
+        if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
+
+
         // what if we see a character that's not part of any vowel or
         // consonant?  We return it.
         if (null == l && null == r) {
@@ -236,7 +248,8 @@ class TPairListFactory {
         }
 
         howMuch[0] = (((l == null) ? 0 : l.length())
-                      + ((r == null) ? 0 : r.length()));
+                      + ((r == null) ? 0 : r.length())
+                      + mod);
         return new TPair(l, r);
     }
 }