Two things:

One, TMW->EWTS gives dbas and dngas instead of dabs and dangs because Chris Fynn's e-mail from today has dbas and dngas. Second, Down with ACIPRules. Long live ACIPTraits. EWTS->Tibetan conversion is closer still.
2005-02-22 04:36:54 +00:00 · 2005-02-22 04:36:54 +00:00 · c16f633ecf
commit c16f633ecf
parent 82c6047cc2
18 changed files with 950 additions and 818 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPTraits.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTraits.java
@ -18,11 +18,25 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

+import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.StringTokenizer;
+import java.util.List;
+
+import org.thdl.util.ThdlOptions;
+import org.thdl.tib.text.DuffCode;
+import org.thdl.tib.text.THDLWylieConstants;
+import org.thdl.tib.text.TibetanMachineWeb;
+import org.thdl.tib.text.TibTextUtils;
+
+
 /** A singleton class that should contain (but due to laziness and
 *  ignorance probably does not contain) all the traits that make ACIP
- *  transliteration different from other (say, EWTS)
- *  transliterations. */
-final class ACIPTraits implements TTraits {
+ *  transliteration scheme different from other (say, EWTS)
+ *  transliteration schemes.  This is not safe to use in concurrent
+ *  programs but it would be easy to make it so. */
+public final class ACIPTraits implements TTraits {
    /** sole instance of this class */
    private static ACIPTraits singleton = null;

@ -30,7 +44,7 @@ final class ACIPTraits implements TTraits {
    private ACIPTraits() { }

    /** Returns the singleton instance of this class. */
-    public static ACIPTraits instance() {
+    public static /* synchronized */ ACIPTraits instance() {
        if (null == singleton) {
            singleton = new ACIPTraits();
        }
@ -43,15 +57,536 @@ final class ACIPTraits implements TTraits {
    /** Returns '-'. */
    public char disambiguatorChar() { return '-'; }

-    public int maxConsonantLength() { return ACIPRules.MAX_CONSONANT_LENGTH; }
+    public int maxConsonantLength() { return MAX_CONSONANT_LENGTH; }

-    public int maxWowelLength() { return ACIPRules.MAX_WOWEL_LENGTH; }
-
-    public boolean isConsonant(String s) { return ACIPRules.isConsonant(s); }
-
-    public boolean isWowel(String s) { return ACIPRules.isWowel(s); }
+    public int maxWowelLength() { return MAX_WOWEL_LENGTH; }

    public boolean hasSimpleError(TPair p) {
        return ("A".equals(p.getLeft()) && null == p.getRight());
    }
+
+    public String aVowel() { return "A"; }
+
+    public boolean isPostsuffix(String l) {
+        return ("S".equals(l)
+                || "D".equals(l));
+    }
+
+    public boolean isSuffix(String l) {
+        return ("S".equals(l)
+                || "G".equals(l)
+                || "D".equals(l)
+                || "M".equals(l)
+                || "'".equals(l)
+                || "B".equals(l)
+                || "NG".equals(l)
+                || "N".equals(l)
+                || "L".equals(l)
+                || "R".equals(l));
+    }
+
+    public boolean isPrefix(String l) {
+        return ("'".equals(l)
+                || "M".equals(l)
+                || "B".equals(l)
+                || "D".equals(l)
+                || "G".equals(l));
+    }
+
+    private HashMap superACIP2unicode = null;
+    private HashMap subACIP2unicode = null;
+    public /* synchronized */ String getUnicodeFor(String acip, boolean subscribed) {
+        if (superACIP2unicode == null) {
+            final boolean compactUnicode
+                = ThdlOptions.getBooleanOption("thdl.acip.to.unicode.conversions.use.0F52.et.cetera");
+            superACIP2unicode = new HashMap(144);
+            subACIP2unicode = new HashMap(42);
+
+            // oddball:
+            subACIP2unicode.put("V", "\u0FAD");
+
+            superACIP2unicode.put("DH", (compactUnicode ? "\u0F52" : "\u0F51\u0FB7"));
+            subACIP2unicode.put("DH", (compactUnicode ? "\u0FA2" : "\u0FA1\u0FB7"));
+            superACIP2unicode.put("BH", (compactUnicode ? "\u0F57" : "\u0F56\u0FB7"));
+            subACIP2unicode.put("BH", (compactUnicode ? "\u0FA7" : "\u0FA6\u0FB7"));
+            superACIP2unicode.put("dH", (compactUnicode ? "\u0F4D" : "\u0F4C\u0FB7"));
+            subACIP2unicode.put("dH", (compactUnicode ? "\u0F9D" : "\u0F9C\u0FB7"));
+            superACIP2unicode.put("DZH", (compactUnicode ? "\u0F5C" : "\u0F5B\u0FB7"));
+            subACIP2unicode.put("DZH", (compactUnicode ? "\u0FAC" : "\u0FAB\u0FB7"));
+            superACIP2unicode.put("Ksh", (compactUnicode ? "\u0F69" : "\u0F40\u0FB5"));
+            subACIP2unicode.put("Ksh", (compactUnicode ? "\u0FB9" : "\u0F90\u0FB5"));
+            superACIP2unicode.put("GH", (compactUnicode ? "\u0F43" : "\u0F42\u0FB7"));
+            subACIP2unicode.put("GH", (compactUnicode ? "\u0F93" : "\u0F92\u0FB7"));
+            superACIP2unicode.put("K", "\u0F40");
+            subACIP2unicode.put("K", "\u0F90");
+            superACIP2unicode.put("KH", "\u0F41");
+            subACIP2unicode.put("KH", "\u0F91");
+            superACIP2unicode.put("G", "\u0F42");
+            subACIP2unicode.put("G", "\u0F92");
+            superACIP2unicode.put("NG", "\u0F44");
+            subACIP2unicode.put("NG", "\u0F94");
+            superACIP2unicode.put("C", "\u0F45");
+            subACIP2unicode.put("C", "\u0F95");
+            superACIP2unicode.put("CH", "\u0F46");
+            subACIP2unicode.put("CH", "\u0F96");
+            superACIP2unicode.put("J", "\u0F47");
+            subACIP2unicode.put("J", "\u0F97");
+            superACIP2unicode.put("NY", "\u0F49");
+            subACIP2unicode.put("NY", "\u0F99");
+            superACIP2unicode.put("T", "\u0F4F");
+            subACIP2unicode.put("T", "\u0F9F");
+            superACIP2unicode.put("TH", "\u0F50");
+            subACIP2unicode.put("TH", "\u0FA0");
+            superACIP2unicode.put("D", "\u0F51");
+            subACIP2unicode.put("D", "\u0FA1");
+            superACIP2unicode.put("N", "\u0F53");
+            subACIP2unicode.put("N", "\u0FA3");
+            superACIP2unicode.put("P", "\u0F54");
+            subACIP2unicode.put("P", "\u0FA4");
+            superACIP2unicode.put("PH", "\u0F55");
+            subACIP2unicode.put("PH", "\u0FA5");
+            superACIP2unicode.put("B", "\u0F56");
+            subACIP2unicode.put("B", "\u0FA6");
+            superACIP2unicode.put("M", "\u0F58");
+            subACIP2unicode.put("M", "\u0FA8");
+            superACIP2unicode.put("TZ", "\u0F59");
+            subACIP2unicode.put("TZ", "\u0FA9");
+            superACIP2unicode.put("TS", "\u0F5A");
+            subACIP2unicode.put("TS", "\u0FAA");
+            superACIP2unicode.put("DZ", "\u0F5B");
+            subACIP2unicode.put("DZ", "\u0FAB");
+            superACIP2unicode.put("W", "\u0F5D");
+            subACIP2unicode.put("W", "\u0FBA"); // oddball
+            superACIP2unicode.put("ZH", "\u0F5E");
+            subACIP2unicode.put("ZH", "\u0FAE");
+            superACIP2unicode.put("Z", "\u0F5F");
+            subACIP2unicode.put("Z", "\u0FAF");
+            superACIP2unicode.put("'", "\u0F60");
+            subACIP2unicode.put("'", "\u0FB0");
+            superACIP2unicode.put("Y", "\u0F61");
+            subACIP2unicode.put("Y", "\u0FB1");
+            superACIP2unicode.put("R", "\u0F62");
+            subACIP2unicode.put("R", "\u0FB2");
+            superACIP2unicode.put("L", "\u0F63");
+            subACIP2unicode.put("L", "\u0FB3");
+            superACIP2unicode.put("SH", "\u0F64");
+            subACIP2unicode.put("SH", "\u0FB4");
+            superACIP2unicode.put("S", "\u0F66");
+            subACIP2unicode.put("S", "\u0FB6");
+            superACIP2unicode.put("H", "\u0F67");
+            subACIP2unicode.put("H", "\u0FB7");
+            superACIP2unicode.put("A", "\u0F68");
+            subACIP2unicode.put("A", "\u0FB8");
+            superACIP2unicode.put("t", "\u0F4A");
+            subACIP2unicode.put("t", "\u0F9A");
+            superACIP2unicode.put("th", "\u0F4B");
+            subACIP2unicode.put("th", "\u0F9B");
+            superACIP2unicode.put("d", "\u0F4C");
+            subACIP2unicode.put("d", "\u0F9C");
+            superACIP2unicode.put("n", "\u0F4E");
+            subACIP2unicode.put("n", "\u0F9E");
+            superACIP2unicode.put("sh", "\u0F65");
+            subACIP2unicode.put("sh", "\u0FB5");
+
+            superACIP2unicode.put("I", "\u0F72");
+            superACIP2unicode.put("E", "\u0F7A");
+            superACIP2unicode.put("O", "\u0F7C");
+            superACIP2unicode.put("U", "\u0F74");
+            superACIP2unicode.put("OO", "\u0F7D");
+            superACIP2unicode.put("EE", "\u0F7B");
+            superACIP2unicode.put("i", "\u0F80");
+            superACIP2unicode.put("'A", "\u0F71");
+            superACIP2unicode.put("'I", "\u0F71\u0F72");
+            superACIP2unicode.put("'E", "\u0F71\u0F7A");
+            superACIP2unicode.put("'O", "\u0F71\u0F7C");
+            superACIP2unicode.put("'U", "\u0F71\u0F74");
+            superACIP2unicode.put("'OO", "\u0F71\u0F7D");
+            superACIP2unicode.put("'EE", "\u0F71\u0F7B");
+            superACIP2unicode.put("'i", "\u0F71\u0F80");
+
+            superACIP2unicode.put("Im", "\u0F72\u0F7E");
+            superACIP2unicode.put("Em", "\u0F7A\u0F7E");
+            superACIP2unicode.put("Om", "\u0F7C\u0F7E");
+            superACIP2unicode.put("Um", "\u0F74\u0F7E");
+            superACIP2unicode.put("OOm", "\u0F7D\u0F7E");
+            superACIP2unicode.put("EEm", "\u0F7B\u0F7E");
+            superACIP2unicode.put("im", "\u0F80\u0F7E");
+            superACIP2unicode.put("'Am", "\u0F71\u0F7E");
+            superACIP2unicode.put("'Im", "\u0F71\u0F72\u0F7E");
+            superACIP2unicode.put("'Em", "\u0F71\u0F7A\u0F7E");
+            superACIP2unicode.put("'Om", "\u0F71\u0F7C\u0F7E");
+            superACIP2unicode.put("'Um", "\u0F71\u0F74\u0F7E");
+            superACIP2unicode.put("'OOm", "\u0F71\u0F7D\u0F7E");
+            superACIP2unicode.put("'EEm", "\u0F71\u0F7B\u0F7E");
+            superACIP2unicode.put("'im", "\u0F71\u0F80\u0F7E");
+
+            superACIP2unicode.put("I:", "\u0F72\u0F7F");
+            superACIP2unicode.put("E:", "\u0F7A\u0F7F");
+            superACIP2unicode.put("O:", "\u0F7C\u0F7F");
+            superACIP2unicode.put("U:", "\u0F74\u0F7F");
+            superACIP2unicode.put("OO:", "\u0F7D\u0F7F");
+            superACIP2unicode.put("EE:", "\u0F7B\u0F7F");
+            superACIP2unicode.put("i:", "\u0F80\u0F7F");
+            superACIP2unicode.put("'A:", "\u0F71\u0F7F");
+            superACIP2unicode.put("'I:", "\u0F71\u0F72\u0F7F");
+            superACIP2unicode.put("'E:", "\u0F71\u0F7A\u0F7F");
+            superACIP2unicode.put("'O:", "\u0F71\u0F7C\u0F7F");
+            superACIP2unicode.put("'U:", "\u0F71\u0F74\u0F7F");
+            superACIP2unicode.put("'OO:", "\u0F71\u0F7D\u0F7F");
+            superACIP2unicode.put("'EE:", "\u0F71\u0F7B\u0F7F");
+            superACIP2unicode.put("'i:", "\u0F71\u0F80\u0F7F");
+
+            superACIP2unicode.put("Im:", "\u0F72\u0F7E\u0F7F");
+            superACIP2unicode.put("Em:", "\u0F7A\u0F7E\u0F7F");
+            superACIP2unicode.put("Om:", "\u0F7C\u0F7E\u0F7F");
+            superACIP2unicode.put("Um:", "\u0F74\u0F7E\u0F7F");
+            superACIP2unicode.put("OOm:", "\u0F7D\u0F7E\u0F7F");
+            superACIP2unicode.put("EEm:", "\u0F7B\u0F7E\u0F7F");
+            superACIP2unicode.put("im:", "\u0F80\u0F7E\u0F7F");
+            superACIP2unicode.put("'Am:", "\u0F71\u0F7E\u0F7F");
+            superACIP2unicode.put("'Im:", "\u0F71\u0F72\u0F7E\u0F7F");
+            superACIP2unicode.put("'Em:", "\u0F71\u0F7A\u0F7E\u0F7F");
+            superACIP2unicode.put("'Om:", "\u0F71\u0F7C\u0F7E\u0F7F");
+            superACIP2unicode.put("'Um:", "\u0F71\u0F74\u0F7E\u0F7F");
+            superACIP2unicode.put("'OOm:", "\u0F71\u0F7D\u0F7E\u0F7F");
+            superACIP2unicode.put("'EEm:", "\u0F71\u0F7B\u0F7E\u0F7F");
+            superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
+            // :m does not appear, though you'd think it's as valid as m:.
+
+            superACIP2unicode.put("m", "\u0F7E");
+            superACIP2unicode.put(":", "\u0F7F");
+            superACIP2unicode.put("m:", "\u0F7E\u0F7F");
+
+            superACIP2unicode.put("Am", "\u0F7E");
+            superACIP2unicode.put("A:", "\u0F7F");
+            superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
+
+            superACIP2unicode.put("0", "\u0F20");
+            superACIP2unicode.put("1", "\u0F21");
+            superACIP2unicode.put("2", "\u0F22");
+            superACIP2unicode.put("3", "\u0F23");
+            superACIP2unicode.put("4", "\u0F24");
+            superACIP2unicode.put("5", "\u0F25");
+            superACIP2unicode.put("6", "\u0F26");
+            superACIP2unicode.put("7", "\u0F27");
+            superACIP2unicode.put("8", "\u0F28");
+            superACIP2unicode.put("9", "\u0F29");
+
+            // punctuation
+            superACIP2unicode.put("&", "\u0F85");
+            superACIP2unicode.put(",", "\u0F0D");
+            superACIP2unicode.put(" ", "\u0F0B");
+            superACIP2unicode.put(".", "\u0F0C");
+            superACIP2unicode.put("`", "\u0F08");
+            superACIP2unicode.put("`", "\u0F08");
+            superACIP2unicode.put("*", "\u0F04\u0F05");
+            superACIP2unicode.put("#", "\u0F04\u0F05\u0F05");
+            superACIP2unicode.put("%", "\u0F35"); // but might be U+0F14, so we warn.
+            superACIP2unicode.put("o", "\u0F37");
+            superACIP2unicode.put(";", "\u0F11");
+            superACIP2unicode.put("\r", "\r");
+            superACIP2unicode.put("\t", "\t");
+            superACIP2unicode.put("\r\n", "\r\n");
+            superACIP2unicode.put("\n", "\n");
+            superACIP2unicode.put("\\", "\u0F84");
+            superACIP2unicode.put("^", "\u0F38");
+
+            // DLC FIXME: "^ GONG" is "^GONG", right?
+            // DLC FIXME: what's the Unicode for x? RC said there is none in plain-text Unicode for x.  But what about in RTF Unicode?
+        }
+        if (subscribed) {
+            String u = (String)subACIP2unicode.get(acip);
+            if (null != u) return u;
+        }
+        return (String)superACIP2unicode.get(acip);
+    }
+
+    private HashMap acipOther2wylie = null;
+    public /* synchronized */ String getEwtsForOther(String acip) {
+        if (acipOther2wylie == null) {
+            acipOther2wylie = new HashMap(20);
+
+            // don't use putMapping for this.  We don't want TMW->ACIP
+            // to produce "." for a U+0F0C because ACIP doesn't say
+            // that "." means U+0F0C.  It just seems to in practice
+            // for ACIP Release IV texts.
+            acipOther2wylie.put(".", "*");
+
+            putMapping(acipOther2wylie, "m", "M");
+            putMapping(acipOther2wylie, ":", "H");
+            putMapping(acipOther2wylie, ",", "/");
+            putMapping(acipOther2wylie, " ", " ");
+            putMapping(acipOther2wylie, ";", "|");
+            putMapping(acipOther2wylie, "`", "!");
+            putMapping(acipOther2wylie, "*", "@#");
+            // There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
+            putMapping(acipOther2wylie, "%", "~X");
+            putMapping(acipOther2wylie, "o", "X");
+            putMapping(acipOther2wylie, "&", "&");
+            putMapping(acipOther2wylie, "^", "\\u0F38");
+
+            putMapping(acipOther2wylie, "0", "0");
+            putMapping(acipOther2wylie, "1", "1");
+            putMapping(acipOther2wylie, "2", "2");
+            putMapping(acipOther2wylie, "3", "3");
+            putMapping(acipOther2wylie, "4", "4");
+            putMapping(acipOther2wylie, "5", "5");
+            putMapping(acipOther2wylie, "6", "6");
+            putMapping(acipOther2wylie, "7", "7");
+            putMapping(acipOther2wylie, "8", "8");
+            putMapping(acipOther2wylie, "9", "9");
+        }
+        return (String)acipOther2wylie.get(acip);
+    }
+
+    public TTshegBarScanner scanner() { return ACIPTshegBarScanner.instance(); }
+
+    /** Registers acip->wylie mappings in toWylie; registers
+        wylie->acip mappings in {@link #wylieToACIP}. */
+    private /* synchronized */ void putMapping(HashMap toWylie, String ACIP, String EWTS) {
+        toWylie.put(ACIP, EWTS);
+        if (null == wylieToACIP) {
+            wylieToACIP = new HashMap(75);
+
+            // We don't want to put "/" in toWylie:
+            wylieToACIP.put("(", "/");
+            wylieToACIP.put(")", "/");
+            wylieToACIP.put("?", "\\");
+
+            wylieToACIP.put("_", " "); // oddball.
+            wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
+        }
+        wylieToACIP.put(EWTS, ACIP);
+    }
+
+    /** A map from EWTS to ACIP.  Note that the EWTS "w" maps to both
+        "V" and "W" in reality but this map will only give one or the
+        other. */
+    private HashMap wylieToACIP = null;
+    /** Returns the ACIP transliteration corresponding to the THDL
+        Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
+        recognized. */
+    public String getACIPForEWTS(String EWTS) {
+        getEwtsForConsonant(null); // inits wylieToACIP
+        getEwtsForOther(null); // inits wylieToACIP
+        getEwtsForWowel(null); // inits wylieToACIP
+        String ans = (String)wylieToACIP.get(EWTS);
+        boolean useCapitalW = false;
+        if (EWTS.startsWith("w"))
+            useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
+        if (null == ans) {
+            StringBuffer finalAns = new StringBuffer(EWTS.length());
+            StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
+            while (sTok.hasMoreTokens()) {
+                String part, tok = sTok.nextToken();
+                if (tok.equals("-") || tok.equals("+"))
+                    part = tok;
+                else {
+                    if ("w".equals(tok)) {
+                        // There are only two stacks in TMW that have
+                        // U+0FBA: R+Wa and w+Wa.  TMW->ACIP fails for
+                        // these unless we handle it here.  (FIXME:
+                        // add an automated test for this).
+                        if ("R+W".equals(EWTS) || "w+W".equals(EWTS)) {
+                            part = "W";
+                        } else {
+                            part = "V";
+                        }
+                    } else {
+                        part = (String)wylieToACIP.get(tok);
+                    }
+                }
+                if (null == part) return null;
+                finalAns.append(part);
+            }
+            if (useCapitalW)
+                finalAns.setCharAt(0, 'W');
+            return finalAns.toString();
+        }
+        if (useCapitalW)
+            return "W" + ans.substring(1);
+        else
+            return ans;
+    }
+
+    private HashMap acipConsonant2wylie = null;
+    /** Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
+     *  even though sometimes the EWTS for those is "w", "R", or "Y".
+     *  Handle that in the caller. */
+    public /* synchronized */ String getEwtsForConsonant(String acip) {
+        if (acipConsonant2wylie == null) {
+            acipConsonant2wylie = new HashMap(37);
+
+            // oddball:
+            putMapping(acipConsonant2wylie, "V", "w");
+
+            // more oddballs:
+            putMapping(acipConsonant2wylie, "DH", "d+h");
+            putMapping(acipConsonant2wylie, "BH", "b+h");
+            putMapping(acipConsonant2wylie, "dH", "D+h");
+            putMapping(acipConsonant2wylie, "DZH", "dz+h"); // longest, MAX_CONSONANT_LENGTH characters
+            putMapping(acipConsonant2wylie, "Ksh", "k+Sh"); // longest, MAX_CONSONANT_LENGTH characters
+            putMapping(acipConsonant2wylie, "GH", "g+h");
+
+
+            putMapping(acipConsonant2wylie, "K", "k");
+            putMapping(acipConsonant2wylie, "KH", "kh");
+            putMapping(acipConsonant2wylie, "G", "g");
+            putMapping(acipConsonant2wylie, "NG", "ng");
+            putMapping(acipConsonant2wylie, "C", "c");
+            putMapping(acipConsonant2wylie, "CH", "ch");
+            putMapping(acipConsonant2wylie, "J", "j");
+            putMapping(acipConsonant2wylie, "NY", "ny");
+            putMapping(acipConsonant2wylie, "T", "t");
+            putMapping(acipConsonant2wylie, "TH", "th");
+            putMapping(acipConsonant2wylie, "D", "d");
+            putMapping(acipConsonant2wylie, "N", "n");
+            putMapping(acipConsonant2wylie, "P", "p");
+            putMapping(acipConsonant2wylie, "PH", "ph");
+            putMapping(acipConsonant2wylie, "B", "b");
+            putMapping(acipConsonant2wylie, "M", "m");
+            putMapping(acipConsonant2wylie, "TZ", "ts");
+            putMapping(acipConsonant2wylie, "TS", "tsh");
+            putMapping(acipConsonant2wylie, "DZ", "dz");
+            putMapping(acipConsonant2wylie, "W", "W"
+                       /* NOTE WELL: sometimes "w", sometimes "W".
+                          Handle this in the caller.
+                          
+                          Reasoning for "W" instead of "w": r-w and
+                          r+w are both known hash keys.  We sort 'em
+                          out this way.  (They are the only things
+                          like this according to bug report #800166.)  */
+                       );
+            putMapping(acipConsonant2wylie, "ZH", "zh");
+            putMapping(acipConsonant2wylie, "Z", "z");
+            putMapping(acipConsonant2wylie, "'", "'");
+            putMapping(acipConsonant2wylie, "Y", "y");
+            putMapping(acipConsonant2wylie, "R", "r");
+            putMapping(acipConsonant2wylie, "L", "l");
+            putMapping(acipConsonant2wylie, "SH", "sh");
+            putMapping(acipConsonant2wylie, "S", "s");
+            putMapping(acipConsonant2wylie, "H", "h");
+            putMapping(acipConsonant2wylie, "A", "a");
+            putMapping(acipConsonant2wylie, "t", "T");
+            putMapping(acipConsonant2wylie, "th", "Th");
+            putMapping(acipConsonant2wylie, "d", "D");
+            putMapping(acipConsonant2wylie, "n", "N");
+            putMapping(acipConsonant2wylie, "sh", "Sh");
+        }
+        return (String)acipConsonant2wylie.get(acip);
+    }
+
+    private HashMap acipWowel2wylie = null;
+    public /* synchronized */ String getEwtsForWowel(String acip) {
+        if (acipWowel2wylie == null) {
+            acipWowel2wylie = new HashMap(baseVowels.length * 4);
+
+            for (int i = 0; i < baseVowels.length; i++) {
+                putMapping(acipWowel2wylie, baseVowels[i][0], baseVowels[i][1]);
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
+                putMapping(acipWowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
+                putMapping(acipWowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
+                putMapping(acipWowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
+            }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
+            // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
+
+            // Keep this code in sync with getUnicodeFor.
+        }
+        return (String)acipWowel2wylie.get(acip);
+    }
+
+    /** {Ksh}, the longest consonant, has 3 characters, so this is
+     *  three. */
+    private static int MAX_CONSONANT_LENGTH = 3;
+
+    /** {'EEm:}, the longest wowel, has 5 characters, so this is
+     *  five. */
+    private static int MAX_WOWEL_LENGTH = 5;
+
+    private static String[][] baseVowels = new String[][] {
+        // { ACIP, EWTS, EWTS for ACIP {'\'' + baseVowels[][0]}, vowel
+        // numbers (see TibetanMachineWeb's VOWEL_A, VOWEL_o, etc.) 
+        // for ACIP, vowel numbers for ACIP {'\'' + baseVowels[][0]}
+        { "A", "a", "A" },
+        { "I", "i", "I" },
+        { "U", "u", "U" },
+        { "E", "e", "Ae" },
+        { "O", "o", "Ao" },
+        { "EE", "ai", "Aai" },
+        { "OO", "au", "Aau" },
+        { "i", "-i", "A-i" }
+    };
+
+    /** Returns true if and only if s is an ACIP wowel.  You can't
+     *  just call this any time -- A is both a consonant and a vowel
+     *  in ACIP, so you have to call this in the right context. */
+    public boolean isWowel(String s) {
+        // I'm on my own with 'O and 'E and 'OO and 'EE, but GANG'O
+        // appears and I wonder... so here they are.  It's consistent
+        // with 'I and 'A and 'U, at least: all the vowels may appear
+        // as K'vowel.  DLC FIXME: ask.
+        return (null != getEwtsForWowel(s));
+    }
+
+    /** Returns true if and only if s is an ACIP consonant. */
+    public boolean isConsonant(String s) {
+        return (null != getEwtsForConsonant(s));
+    }
+
+    /** Gets the duffcodes for wowel, such that they look good with
+     *  the preceding glyph, and appends them to duff. */
+    public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
+        if (null == wowel) return;
+        if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion!  Use assert.
+            throw new IllegalArgumentException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
+
+        // Order matters here.
+        boolean context_added[] = new boolean[] { false };
+        if (wowel.startsWith("A")) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
+        } else if (wowel.indexOf("'U") >= 0) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
+        } else if (wowel.indexOf("'I") >= 0) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
+        } else {
+            if (wowel.indexOf('\'') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
+            }
+            if (wowel.indexOf("EE") >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
+            } else if (wowel.indexOf('E') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
+            }
+            if (wowel.indexOf("OO") >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
+            } else if (wowel.indexOf('O') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
+            }
+            if (wowel.indexOf('I') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
+            }
+            if (wowel.indexOf('U') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
+            }
+            if (wowel.indexOf('i') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
+            }
+        }
+        // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
+
+        if (wowel.indexOf('m') >= 0) {
+            DuffCode last = (DuffCode)duff.get(duff.size() - 1);
+            duff.remove(duff.size() - 1); // getBindu will add it back...
+            TibTextUtils.getBindu(duff, last);
+        }
+        if (wowel.indexOf(':') >= 0)
+            duff.add(TibetanMachineWeb.getGlyph(getEwtsForOther(":")));
+    }
 }
+