Two things:

One, TMW->EWTS gives dbas and dngas instead of dabs and dangs because Chris Fynn's e-mail from today has dbas and dngas. Second, Down with ACIPRules. Long live ACIPTraits. EWTS->Tibetan conversion is closer still.
2005-02-22 04:36:54 +00:00 · 2005-02-22 04:36:54 +00:00 · c16f633ecf
commit c16f633ecf
parent 82c6047cc2
18 changed files with 950 additions and 818 deletions
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@ -969,6 +969,22 @@ public class DuffPaneTest extends DuffPaneTestBase {

        ensureKeysGiveCorrectWylie("'gas");

+        /* Chris Fynn's e-mail on Feb 21 2005 leads to these test
+           cases: */
+        {
+            ensureKeysGiveCorrectWylie("dgas");
+            ensureKeysGiveCorrectWylie("'gas");
+            ensureKeysGiveCorrectWylie("dngas");
+            ensureKeysGiveCorrectWylie("gnad");
+            ensureKeysGiveCorrectWylie("mnad");
+            ensureKeysGiveCorrectWylie("bags");
+            ensureKeysGiveCorrectWylie("dbas");
+            ensureKeysGiveCorrectWylie("'bas");
+            ensureKeysGiveCorrectWylie("mags");
+            ensureKeysGiveCorrectWylie("mangs");
+            ensureKeysGiveCorrectWylie("dmas");
+        }
+
        ensureKeysGiveCorrectWylie("gangs");

        ensureKeysGiveCorrectWylie("gnags");
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -27,7 +27,7 @@ import org.thdl.util.*;
 import org.thdl.tib.text.*;

 import org.thdl.tib.text.ttt.TConverter;
-import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
+import org.thdl.tib.text.ttt.ACIPTraits;
 import java.util.ArrayList;

 /** TibetanConverter is a command-line utility for converting to and
@ -297,17 +297,18 @@ public class TibetanConverter implements FontConverterConstants {
        if (ACIP_TO_UNI_TEXT == ct || ACIP_TO_TMW == ct) {
            try {
                ArrayList al
-                    = ACIPTshegBarScanner.instance().scanStream(in, null,
-                                                                ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
-                                                                                             1000 - 1),
-                                                                shortMessages,
-                                                                warningLevel);
+                    = ACIPTraits.instance().scanner().scanStream(in, null,
+                                                                 ThdlOptions.getIntegerOption("thdl.most.errors.a.tibetan.acip.document.can.have",
+                                                                                              1000 - 1),
+                                                                 shortMessages,
+                                                                 warningLevel);
                if (null == al)
                    return 47;
                boolean embeddedWarnings = (warningLevel != "None");
                boolean hasWarnings[] = new boolean[] { false };
                if (ACIP_TO_UNI_TEXT == ct) {
-                    if (!TConverter.convertToUnicodeText(al, out, null,
+                    if (!TConverter.convertToUnicodeText(ACIPTraits.instance(),
+                                                         al, out, null,
                                                         null, hasWarnings,
                                                         embeddedWarnings,
                                                         warningLevel,
@ -315,7 +316,8 @@ public class TibetanConverter implements FontConverterConstants {
                        return 46;
                } else {
                    if (ct != ACIP_TO_TMW) throw new Error("badness");
-                    if (!TConverter.convertToTMW(al, out, null, null,
+                    if (!TConverter.convertToTMW(ACIPTraits.instance(),
+                                                 al, out, null, null,
                                                 hasWarnings,
                                                 embeddedWarnings,
                                                 warningLevel, shortMessages,
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@ -137,7 +137,7 @@ public class TGCPair implements THDLWylieConstants {
                consonantACIP = "V";
            else
                consonantACIP
-                    = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
+                    = org.thdl.tib.text.ttt.ACIPTraits.instance().getACIPForEWTS(consonantWylie);
            if (null == consonantACIP) {
                if (null != consonantWylie && consonantWylie.startsWith("R+"))
                    return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie, " because the ACIP R+... could imply the short superscribed form, but this most likely intends the full form (i.e., Unicode character U+0F6A)");
@ -160,7 +160,7 @@ public class TGCPair implements THDLWylieConstants {
        }
        if (vowelWylie != null) {
            String vowelACIP
-                = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
+                = org.thdl.tib.text.ttt.ACIPTraits.instance().getACIPForEWTS(vowelWylie);
            if (null == vowelACIP) {
                return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie, "");
            } else {
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -25,7 +25,7 @@ import javax.swing.text.rtf.RTFEditorKit;
 import java.io.*;

 import org.thdl.util.ThdlDebug;
-import org.thdl.tib.text.ttt.ACIPTshegBarScanner;
+import org.thdl.tib.text.ttt.ACIPTraits;
 import org.thdl.tib.text.ttt.TConverter;
 import org.thdl.tib.text.tshegbar.LegalTshegBar;
 import org.thdl.tib.text.tshegbar.UnicodeConstants;
@ -333,8 +333,8 @@ public class TibTextUtils implements THDLWylieConstants {
    {
        StringBuffer errors = new StringBuffer();
        String warningLevel = withWarnings ? "All" : "None";
-        ArrayList al = ACIPTshegBarScanner.instance().scan(acip, errors, 500,
-                                                           false, warningLevel);
+        ArrayList al = ACIPTraits.instance().scanner().scan(acip, errors, 500,
+                                                            false, warningLevel);
        if (null == al || errors.length() > 0) {
            if (errors.length() > 0)
                throw new InvalidACIPException(errors.toString());
@ -348,8 +348,8 @@ public class TibTextUtils implements THDLWylieConstants {
        }
        try {
            int tloc[] = new int[] { loc };
-            TConverter.convertToTMW(al, tdoc, null, null, null,
-                                    putWarningsInOutput, warningLevel,
+            TConverter.convertToTMW(ACIPTraits.instance(), al, tdoc, null, null,
+                                    null, putWarningsInOutput, warningLevel,
                                    false, colors, tloc);
            return tloc[0] - loc;
        } catch (IOException e) {
@ -1430,6 +1430,53 @@ public class TibTextUtils implements THDLWylieConstants {
            candidateType = getCandidateTypeModuloAppendage(candidateType);

            if ("prefix/root-root/suffix-suffix/postsuffix" == candidateType) {
+                /* Update: Chris Fynn wrote this in response to an
+e-mail from David Chapman on Feb 21, 2005:
+
+<quote Chris Fynn feb 21 2005>
+When working out the rules for Tibetan and Dzongkha
+collation in Bhutan we came up with the following sequences
+that could be ambiguous:
+
+0F51 0F42 0F66
+0F60 0F42 0F66
+0F51 0F44 0F66
+0F42 0F53 0F51
+0F58 0F53 0F51
+0F56 0F42 0F66
+0F51 0F56 0F66
+0F60 0F56 0F66
+0F58 0F42 0F66
+0F58 0F44 0F66
+0F51 0F58 0F66
+
+After much consultation with experts in Bhutan it was
+decided these should always be read as follows:
+
+0F51 0F42 0F66  dgas
+0F60 0F42 0F66  'gas
+0F51 0F44 0F66  dngas *
+0F42 0F53 0F51  gnad
+0F58 0F53 0F51  mnad *
+0F56 0F42 0F66  bags
+0F51 0F56 0F66  dbas
+0F60 0F56 0F66  'bas *
+0F58 0F42 0F66  mags
+0F58 0F44 0F66  mangs
+0F51 0F58 0F66  dmas
+
+In most cases it was found that only one of the two possible
+readings actually existed as words. 0F51 0F44 0F66 , 0F58
+0F53 0F51, and 0F60 0F56 0F66 were not found as syllables in
+any known words, but the experts felt that *if* they
+occurred in Tibetan or Dzongkha text then dngas, mnad, and
+'bas would be the most likely reading.
+</quote>
+
+
+
+    Because of this e-mail, dbas and dngas were added to the list of
+    exceptions.  */
                /* Yes, this is ambiguous. How do we handle it?  See
                 * this from Andres (but note that only 4 of the 14 in
                 * the second list are ambiguous because ra na sa and
@ -1480,7 +1527,9 @@ public class TibTextUtils implements THDLWylieConstants {
                                                || wylie2.equals("n")
                                                || wylie2.equals("s")))
                        || (wylie1.equals("d") && (wylie2.equals("g")
-                                                   || wylie2.equals("m")))
+                                                   || wylie2.equals("m")
+                                                   || wylie2.equals("b")
+                                                   || wylie2.equals("ng")))
                        || (wylie1.equals("b") && wylie2.equals("d"))
                        || (wylie1.equals("m") && wylie2.equals("d"))
                        || (wylie1.equals("'") && (wylie2.equals("g")
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -1988,7 +1988,7 @@ private static String acipForGlyph(String hashKey) {
        // ~X is a special case because the EWTS is 2 characters in
        // length
        || "~X".equals(hashKey)) // hard-coded EWTS value
-        return org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(hashKey);
+        return org.thdl.tib.text.ttt.ACIPTraits.instance().getACIPForEWTS(hashKey);
    else
        // else we are not be able to use it because it's not smart
        // about stacks (e.g., W+W)
@ -2116,7 +2116,7 @@ public static String getACIPForGlyph(DuffCode dc1,

    // DLC FIXME: TMW.53 is probably going to come out all wrong (VA
    // vs. WA) from this function, but
-    // ACIPRules.getACIPForEWTS(String) seems to come through... will
+    // ACIPTraits.getACIPForEWTS(String) seems to come through... will
    // it always?

    String hashKey = getHashKeyForGlyph(dc1);
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -9,9 +9,9 @@
 //   - blank lines should be ignored
 //   - <?x?> marks a command
 //
-// If you change the Wylie here, it can break the ACIP->TMW and
-// ACIP->Unicode conversion.  So keep ACIPRules in sync with this, and be
-// sure to run 'ant clean check' after your change.
+// If you change the EWTS transliteration here, it can break the
+// ACIP->TMW and ACIP->Unicode conversion.  So keep ACIPTraits in sync
+// with this, and be sure to run 'ant clean check' after your change.
 //
 // Note that some glyphs have EWTS \uF021-\uF0FF inclusive.  These do
 // not have anything in the Unicode column, though, because this is
@ -37,7 +37,7 @@
 // by the way.
 //
 // If EWTS changes, then ACIP->TMW and ACIP->Unicode will break --
-// modify ACIPRules and test test test.
+// modify ACIPTraits and test test test.

 <?Input:Punctuation?>
 //_~32,1~0,32
@ -645,7 +645,7 @@ r+m+m~51,4~~7,59~1,110~8,121~1,123~1,125~8,107~8,114~f62,fa8,fa8
 // Note that TPairList.java's unicodeExceptionsMap must be updated if
 // we change who uses U+0F6A.
 R+Y~52,4~~7,60~1,110~8,120~1,123~1,125~8,106~8,113~f6a,fbb
-// R+W is mentioned in ACIPRules.java:
+// R+W is mentioned in ACIPTraits.java:
 R+W~196,4~~7,61~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fba
 R+sh~53,4~~7,62~1,109~8,120~1,123~1,125~8,106~8,113~f6a,fb4
 R+sh+y~54,4~~7,63~1,109~8,122~1,123~1,125~8,108~8,115~f6a,fb4,fb1
@ -667,7 +667,7 @@ l+h+w~197,4~~7,78~1,109~8,121~1,123~1,125~8,106~8,113~f63,fb7,fad
 w+y~69,4~~7,79~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb1
 w+r~70,4~~7,80~1,109~8,121~1,123~1,125~8,107~8,114~f5d,fb2
 w+n~195,4~~7,81~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fa3
-// w+W is mentioned in ACIPRules.java:
+// w+W is mentioned in ACIPTraits.java:
 w+W~194,4~~7,82~1,109~8,120~1,123~1,125~8,106~8,113~f5d,fba
 sh+ts~71,4~~7,83~1,109~8,120~1,123~1,125~8,106~8,113~f64,fa9
 sh+ts+y~72,4~~7,84~1,109~8,122~1,123~1,125~8,108~8,115~f64,fa9,fb1
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -1,658 +0,0 @@
-/*
-The contents of this file are subject to the THDL Open Community License
-Version 1.0 (the "License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License on the THDL web site 
-(http://www.thdl.org/).
-
-Software distributed under the License is distributed on an "AS IS" basis, 
-WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
-License for the specific terms governing rights and limitations under the 
-License. 
-
-The Initial Developer of this software is the Tibetan and Himalayan Digital
-Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
-All Rights Reserved. 
-
-Contributor(s): ______________________________________.
-*/
-
-package org.thdl.tib.text.ttt;
-
-import java.util.HashSet;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.StringTokenizer;
-import java.util.List;
-
-import org.thdl.util.ThdlOptions;
-import org.thdl.tib.text.DuffCode;
-import org.thdl.tib.text.THDLWylieConstants;
-import org.thdl.tib.text.TibetanMachineWeb;
-import org.thdl.tib.text.TibTextUtils;
-
-// TODO(DLC)[EWTS->Tibetan]: this and ACIPTraits -- unify?
-
-/** Canonizes some facts regarding the ACIP transcription system.
- *  @author David Chandler */
-public class ACIPRules {
-    /** {Ksh}, the longest consonant, has 3 characters, so this is
-     *  three. */
-    public static int MAX_CONSONANT_LENGTH = 3;
-
-    /** {'EEm:}, the longest wowel, has 5 characters, so this is
-     *  five. */
-    public static int MAX_WOWEL_LENGTH = 5;
-
-    /** For O(1) {@link #isWowel(String)} calls. */
-    private static HashSet acipVowels = null;
-
-    private static String[][] baseVowels = new String[][] {
-        // { ACIP, EWTS, EWTS for ACIP {'\'' + baseVowels[][0]}, vowel
-        // numbers (see TibetanMachineWeb's VOWEL_A, VOWEL_o, etc.) 
-        // for ACIP, vowel numbers for ACIP {'\'' + baseVowels[][0]}
-        { "A", "a", "A" },
-        { "I", "i", "I" },
-        { "U", "u", "U" },
-        { "E", "e", "Ae" },
-        { "O", "o", "Ao" },
-        { "EE", "ai", "Aai" },
-        { "OO", "au", "Aau" },
-        { "i", "-i", "A-i" }
-    };
-
-    /** Returns true if and only if s is an ACIP wowel.  You can't
-     *  just call this any time -- A is both a consonant and a vowel
-     *  in ACIP, so you have to call this in the right context. */
-    public static boolean isWowel(String s) {
-        if (null == acipVowels) {
-            acipVowels = new HashSet(baseVowels.length * 8);
-            for (int i = 0; i < baseVowels.length; i++) {
-                // I'm on my own with 'O and 'E and 'OO and 'EE, but
-                // GANG'O appears and I wonder... so here they are.
-                // It's consistent with 'I and 'A and 'U, at least:
-                // all the vowels may appear as K'vowel.  DLC FIXME:
-                // ask.
-
-                acipVowels.add(baseVowels[i][0]);
-                acipVowels.add('\'' + baseVowels[i][0]);
-                acipVowels.add(baseVowels[i][0] + 'm');
-                acipVowels.add('\'' + baseVowels[i][0] + 'm');
-                acipVowels.add(baseVowels[i][0] + ':');
-                acipVowels.add('\'' + baseVowels[i][0] + ':');
-                acipVowels.add(baseVowels[i][0] + "m:");
-                acipVowels.add('\'' + baseVowels[i][0] + "m:");
-
-                // Keep this code in sync with getUnicodeFor.
-                
-                // Keep this code in sync with getWylieForACIPVowel.
-            }
-            // {Pm} is treated just like {PAm}; {P:} is treated just
-            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
-            // that happens thanks to
-        }
-        return (acipVowels.contains(s));
-    }
-
-    /** For O(1) {@link #isConsonant(String)} calls. */
-    private static HashSet consonants = null;
-
-    /** Returns true if and only if acip is an ACIP consonant (without
-     *  a vowel). For example, returns true for "K", but not for
-     *  "KA" or "X". */
-    public static boolean isConsonant(String acip) {
-        if (consonants == null) {
-            consonants = new HashSet();
-            consonants.add("V");
-            consonants.add("K");
-            consonants.add("KH");
-            consonants.add("G");
-            consonants.add("NG");
-            consonants.add("C");
-            consonants.add("CH");
-            consonants.add("J");
-            consonants.add("NY");
-            consonants.add("T");
-            consonants.add("TH");
-            consonants.add("D");
-            consonants.add("N");
-            consonants.add("P");
-            consonants.add("PH");
-            consonants.add("B");
-            consonants.add("M");
-            consonants.add("TZ");
-            consonants.add("TS");
-            consonants.add("DZ");
-            consonants.add("W");
-            consonants.add("ZH");
-            consonants.add("Z");
-            consonants.add("Y");
-            consonants.add("R");
-            consonants.add("L");
-            consonants.add("SH");
-            consonants.add("S");
-            consonants.add("H");
-            consonants.add("t");
-            consonants.add("th");
-            consonants.add("d");
-            consonants.add("n");
-            consonants.add("sh");
-            consonants.add("dH");
-            consonants.add("DH");
-            consonants.add("BH");
-            consonants.add("DZH"); // longest, MAX_CONSONANT_LENGTH characters
-            consonants.add("Ksh"); // longest, MAX_CONSONANT_LENGTH characters
-            consonants.add("GH");
-            consonants.add("'");
-            consonants.add("A");
-        }
-        return consonants.contains(acip);
-    }
-
-    /** A map from wylie to ACIP.  Note that the Wylie "w" maps to
-        both "V" and "W". */
-    private static HashMap wylieToACIP = null;
-    /** Returns the ACIP transliteration corresponding to the THDL
-        Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
-        recognized. */
-    public static String getACIPForEWTS(String EWTS) {
-        getWylieForACIPConsonant(null);
-        getWylieForACIPOther(null);
-        getWylieForACIPVowel(null);
-        String ans = (String)wylieToACIP.get(EWTS);
-        boolean useCapitalW = false;
-        if (EWTS.startsWith("w"))
-            useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
-        if (null == ans) {
-            StringBuffer finalAns = new StringBuffer(EWTS.length());
-            StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
-            while (sTok.hasMoreTokens()) {
-                String part, tok = sTok.nextToken();
-                if (tok.equals("-") || tok.equals("+"))
-                    part = tok;
-                else {
-                    if ("w".equals(tok)) {
-                        // There are only two stacks in TMW that have
-                        // U+0FBA: R+Wa and w+Wa.  TMW->ACIP fails for
-                        // these unless we handle it here.  (FIXME:
-                        // add an automated test for this).
-                        if ("R+W".equals(EWTS) || "w+W".equals(EWTS)) {
-                            part = "W";
-                        } else {
-                            part = "V";
-                        }
-                    } else {
-                        part = (String)wylieToACIP.get(tok);
-                    }
-                }
-                if (null == part) return null;
-                finalAns.append(part);
-            }
-            if (useCapitalW)
-                finalAns.setCharAt(0, 'W');
-            return finalAns.toString();
-        }
-        if (useCapitalW)
-            return "W" + ans.substring(1);
-        else
-            return ans;
-    }
-
-    /** Registers acip->wylie mappings in toWylie; registers
-        wylie->acip mappings in {@link #wylieToACIP}. */
-    private static void putMapping(HashMap toWylie, String ACIP, String EWTS) {
-        toWylie.put(ACIP, EWTS);
-        if (null == wylieToACIP) {
-            wylieToACIP = new HashMap(75);
-
-            // We don't want to put "/" in toWylie:
-            wylieToACIP.put("(", "/");
-            wylieToACIP.put(")", "/");
-            wylieToACIP.put("?", "\\");
-
-            wylieToACIP.put("_", " "); // oddball.
-            wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
-        }
-        wylieToACIP.put(EWTS, ACIP);
-    }
-
-    /** Returns true if and only if s is an ACIP consonant. */
-    static final boolean isACIPConsonant(String s) {
-        return (null != ACIPRules.getWylieForACIPConsonant(s));
-    }
-
-    private static HashMap acipConsonant2wylie = null;
-    /** Returns the EWTS corresponding to the given ACIP consonant
-     *  (without the "A" vowel).  Returns null if there is no such
-     *  EWTS.
-     *
-     *  <p>Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
-     *  even though sometimes the EWTS for those is "w", "R", or "Y".
-     *  Handle that in the caller. */
-    static final String getWylieForACIPConsonant(String acip) {
-        if (acipConsonant2wylie == null) {
-            acipConsonant2wylie = new HashMap(37);
-
-            // oddball:
-            putMapping(acipConsonant2wylie, "V", "w");
-
-            // more oddballs:
-            putMapping(acipConsonant2wylie, "DH", "d+h");
-            putMapping(acipConsonant2wylie, "BH", "b+h");
-            putMapping(acipConsonant2wylie, "dH", "D+h");
-            putMapping(acipConsonant2wylie, "DZH", "dz+h");
-            putMapping(acipConsonant2wylie, "Ksh", "k+Sh");
-            putMapping(acipConsonant2wylie, "GH", "g+h");
-
-
-            putMapping(acipConsonant2wylie, "K", "k");
-            putMapping(acipConsonant2wylie, "KH", "kh");
-            putMapping(acipConsonant2wylie, "G", "g");
-            putMapping(acipConsonant2wylie, "NG", "ng");
-            putMapping(acipConsonant2wylie, "C", "c");
-            putMapping(acipConsonant2wylie, "CH", "ch");
-            putMapping(acipConsonant2wylie, "J", "j");
-            putMapping(acipConsonant2wylie, "NY", "ny");
-            putMapping(acipConsonant2wylie, "T", "t");
-            putMapping(acipConsonant2wylie, "TH", "th");
-            putMapping(acipConsonant2wylie, "D", "d");
-            putMapping(acipConsonant2wylie, "N", "n");
-            putMapping(acipConsonant2wylie, "P", "p");
-            putMapping(acipConsonant2wylie, "PH", "ph");
-            putMapping(acipConsonant2wylie, "B", "b");
-            putMapping(acipConsonant2wylie, "M", "m");
-            putMapping(acipConsonant2wylie, "TZ", "ts");
-            putMapping(acipConsonant2wylie, "TS", "tsh");
-            putMapping(acipConsonant2wylie, "DZ", "dz");
-            putMapping(acipConsonant2wylie, "W", "W"
-                       /* NOTE WELL: sometimes "w", sometimes "W".
-                          Handle this in the caller.
-                          
-                          Reasoning for "W" instead of "w": r-w and
-                          r+w are both known hash keys.  We sort 'em
-                          out this way.  (They are the only things
-                          like this according to bug report #800166.)  */
-                       );
-            putMapping(acipConsonant2wylie, "ZH", "zh");
-            putMapping(acipConsonant2wylie, "Z", "z");
-            putMapping(acipConsonant2wylie, "'", "'");
-            putMapping(acipConsonant2wylie, "Y", "y");
-            putMapping(acipConsonant2wylie, "R", "r");
-            putMapping(acipConsonant2wylie, "L", "l");
-            putMapping(acipConsonant2wylie, "SH", "sh");
-            putMapping(acipConsonant2wylie, "S", "s");
-            putMapping(acipConsonant2wylie, "H", "h");
-            putMapping(acipConsonant2wylie, "A", "a");
-            putMapping(acipConsonant2wylie, "t", "T");
-            putMapping(acipConsonant2wylie, "th", "Th");
-            putMapping(acipConsonant2wylie, "d", "D");
-            putMapping(acipConsonant2wylie, "n", "N");
-            putMapping(acipConsonant2wylie, "sh", "Sh");
-        }
-        return (String)acipConsonant2wylie.get(acip);
-    }
-
-    private static HashMap acipVowel2wylie = null;
-    /** Returns the EWTS corresponding to the given ACIP "vowel".
-     *  Returns null if there is no such EWTS. */
-    static final String getWylieForACIPVowel(String acip) {
-        if (acipVowel2wylie == null) {
-            acipVowel2wylie = new HashMap(baseVowels.length * 4);
-
-            for (int i = 0; i < baseVowels.length; i++) {
-                putMapping(acipVowel2wylie, baseVowels[i][0], baseVowels[i][1]);
-                putMapping(acipVowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
-                putMapping(acipVowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
-                putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
-                putMapping(acipVowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
-                putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
-                putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
-                putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
-            }
-            // {Pm} is treated just like {PAm}; {P:} is treated just
-            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
-            // that happens thanks to
-            // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
-        }
-        return (String)acipVowel2wylie.get(acip);
-    }
-
-    private static HashMap acipOther2wylie = null;
-    /** Returns the EWTS corresponding to the given ACIP puncuation or
-     *  mark.  Returns null if there is no such EWTS. */
-    static final String getWylieForACIPOther(String acip) {
-        if (acipOther2wylie == null) {
-            acipOther2wylie = new HashMap(20);
-
-            // don't use putMapping for this.  We don't want TMW->ACIP
-            // to produce "." for a U+0F0C because ACIP doesn't say
-            // that "." means U+0F0C.  It just seems to in practice
-            // for ACIP Release IV texts.
-            acipOther2wylie.put(".", "*");
-
-            putMapping(acipOther2wylie, "m", "M");
-            putMapping(acipOther2wylie, ":", "H");
-            putMapping(acipOther2wylie, ",", "/");
-            putMapping(acipOther2wylie, " ", " ");
-            putMapping(acipOther2wylie, ";", "|");
-            putMapping(acipOther2wylie, "`", "!");
-            putMapping(acipOther2wylie, "*", "@#");
-            // There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
-            putMapping(acipOther2wylie, "%", "~X");
-            putMapping(acipOther2wylie, "o", "X");
-            putMapping(acipOther2wylie, "&", "&");
-            putMapping(acipOther2wylie, "^", "\\u0F38");
-
-            putMapping(acipOther2wylie, "0", "0");
-            putMapping(acipOther2wylie, "1", "1");
-            putMapping(acipOther2wylie, "2", "2");
-            putMapping(acipOther2wylie, "3", "3");
-            putMapping(acipOther2wylie, "4", "4");
-            putMapping(acipOther2wylie, "5", "5");
-            putMapping(acipOther2wylie, "6", "6");
-            putMapping(acipOther2wylie, "7", "7");
-            putMapping(acipOther2wylie, "8", "8");
-            putMapping(acipOther2wylie, "9", "9");
-        }
-        return (String)acipOther2wylie.get(acip);
-    }
-
-    private static HashMap superACIP2unicode = null;
-    private static HashMap subACIP2unicode = null;
-    /** If acip is an ACIP consonant or vowel or punctuation mark,
-     *  then this returns the Unicode for it.  The Unicode for the
-     *  subscribed form of the glyph is returned if subscribed is
-     *  true.  Returns null if acip is unknown. */
-    static String getUnicodeFor(String acip, boolean subscribed) {
-        if (superACIP2unicode == null) {
-            final boolean compactUnicode
-                = ThdlOptions.getBooleanOption("thdl.acip.to.unicode.conversions.use.0F52.et.cetera");
-            superACIP2unicode = new HashMap(144);
-            subACIP2unicode = new HashMap(42);
-
-            // oddball:
-            subACIP2unicode.put("V", "\u0FAD");
-
-            superACIP2unicode.put("DH", (compactUnicode ? "\u0F52" : "\u0F51\u0FB7"));
-            subACIP2unicode.put("DH", (compactUnicode ? "\u0FA2" : "\u0FA1\u0FB7"));
-            superACIP2unicode.put("BH", (compactUnicode ? "\u0F57" : "\u0F56\u0FB7"));
-            subACIP2unicode.put("BH", (compactUnicode ? "\u0FA7" : "\u0FA6\u0FB7"));
-            superACIP2unicode.put("dH", (compactUnicode ? "\u0F4D" : "\u0F4C\u0FB7"));
-            subACIP2unicode.put("dH", (compactUnicode ? "\u0F9D" : "\u0F9C\u0FB7"));
-            superACIP2unicode.put("DZH", (compactUnicode ? "\u0F5C" : "\u0F5B\u0FB7"));
-            subACIP2unicode.put("DZH", (compactUnicode ? "\u0FAC" : "\u0FAB\u0FB7"));
-            superACIP2unicode.put("Ksh", (compactUnicode ? "\u0F69" : "\u0F40\u0FB5"));
-            subACIP2unicode.put("Ksh", (compactUnicode ? "\u0FB9" : "\u0F90\u0FB5"));
-            superACIP2unicode.put("GH", (compactUnicode ? "\u0F43" : "\u0F42\u0FB7"));
-            subACIP2unicode.put("GH", (compactUnicode ? "\u0F93" : "\u0F92\u0FB7"));
-            superACIP2unicode.put("K", "\u0F40");
-            subACIP2unicode.put("K", "\u0F90");
-            superACIP2unicode.put("KH", "\u0F41");
-            subACIP2unicode.put("KH", "\u0F91");
-            superACIP2unicode.put("G", "\u0F42");
-            subACIP2unicode.put("G", "\u0F92");
-            superACIP2unicode.put("NG", "\u0F44");
-            subACIP2unicode.put("NG", "\u0F94");
-            superACIP2unicode.put("C", "\u0F45");
-            subACIP2unicode.put("C", "\u0F95");
-            superACIP2unicode.put("CH", "\u0F46");
-            subACIP2unicode.put("CH", "\u0F96");
-            superACIP2unicode.put("J", "\u0F47");
-            subACIP2unicode.put("J", "\u0F97");
-            superACIP2unicode.put("NY", "\u0F49");
-            subACIP2unicode.put("NY", "\u0F99");
-            superACIP2unicode.put("T", "\u0F4F");
-            subACIP2unicode.put("T", "\u0F9F");
-            superACIP2unicode.put("TH", "\u0F50");
-            subACIP2unicode.put("TH", "\u0FA0");
-            superACIP2unicode.put("D", "\u0F51");
-            subACIP2unicode.put("D", "\u0FA1");
-            superACIP2unicode.put("N", "\u0F53");
-            subACIP2unicode.put("N", "\u0FA3");
-            superACIP2unicode.put("P", "\u0F54");
-            subACIP2unicode.put("P", "\u0FA4");
-            superACIP2unicode.put("PH", "\u0F55");
-            subACIP2unicode.put("PH", "\u0FA5");
-            superACIP2unicode.put("B", "\u0F56");
-            subACIP2unicode.put("B", "\u0FA6");
-            superACIP2unicode.put("M", "\u0F58");
-            subACIP2unicode.put("M", "\u0FA8");
-            superACIP2unicode.put("TZ", "\u0F59");
-            subACIP2unicode.put("TZ", "\u0FA9");
-            superACIP2unicode.put("TS", "\u0F5A");
-            subACIP2unicode.put("TS", "\u0FAA");
-            superACIP2unicode.put("DZ", "\u0F5B");
-            subACIP2unicode.put("DZ", "\u0FAB");
-            superACIP2unicode.put("W", "\u0F5D");
-            subACIP2unicode.put("W", "\u0FBA"); // oddball
-            superACIP2unicode.put("ZH", "\u0F5E");
-            subACIP2unicode.put("ZH", "\u0FAE");
-            superACIP2unicode.put("Z", "\u0F5F");
-            subACIP2unicode.put("Z", "\u0FAF");
-            superACIP2unicode.put("'", "\u0F60");
-            subACIP2unicode.put("'", "\u0FB0");
-            superACIP2unicode.put("Y", "\u0F61");
-            subACIP2unicode.put("Y", "\u0FB1");
-            superACIP2unicode.put("R", "\u0F62");
-            subACIP2unicode.put("R", "\u0FB2");
-            superACIP2unicode.put("L", "\u0F63");
-            subACIP2unicode.put("L", "\u0FB3");
-            superACIP2unicode.put("SH", "\u0F64");
-            subACIP2unicode.put("SH", "\u0FB4");
-            superACIP2unicode.put("S", "\u0F66");
-            subACIP2unicode.put("S", "\u0FB6");
-            superACIP2unicode.put("H", "\u0F67");
-            subACIP2unicode.put("H", "\u0FB7");
-            superACIP2unicode.put("A", "\u0F68");
-            subACIP2unicode.put("A", "\u0FB8");
-            superACIP2unicode.put("t", "\u0F4A");
-            subACIP2unicode.put("t", "\u0F9A");
-            superACIP2unicode.put("th", "\u0F4B");
-            subACIP2unicode.put("th", "\u0F9B");
-            superACIP2unicode.put("d", "\u0F4C");
-            subACIP2unicode.put("d", "\u0F9C");
-            superACIP2unicode.put("n", "\u0F4E");
-            subACIP2unicode.put("n", "\u0F9E");
-            superACIP2unicode.put("sh", "\u0F65");
-            subACIP2unicode.put("sh", "\u0FB5");
-
-            superACIP2unicode.put("I", "\u0F72");
-            superACIP2unicode.put("E", "\u0F7A");
-            superACIP2unicode.put("O", "\u0F7C");
-            superACIP2unicode.put("U", "\u0F74");
-            superACIP2unicode.put("OO", "\u0F7D");
-            superACIP2unicode.put("EE", "\u0F7B");
-            superACIP2unicode.put("i", "\u0F80");
-            superACIP2unicode.put("'A", "\u0F71");
-            superACIP2unicode.put("'I", "\u0F71\u0F72");
-            superACIP2unicode.put("'E", "\u0F71\u0F7A");
-            superACIP2unicode.put("'O", "\u0F71\u0F7C");
-            superACIP2unicode.put("'U", "\u0F71\u0F74");
-            superACIP2unicode.put("'OO", "\u0F71\u0F7D");
-            superACIP2unicode.put("'EE", "\u0F71\u0F7B");
-            superACIP2unicode.put("'i", "\u0F71\u0F80");
-
-            superACIP2unicode.put("Im", "\u0F72\u0F7E");
-            superACIP2unicode.put("Em", "\u0F7A\u0F7E");
-            superACIP2unicode.put("Om", "\u0F7C\u0F7E");
-            superACIP2unicode.put("Um", "\u0F74\u0F7E");
-            superACIP2unicode.put("OOm", "\u0F7D\u0F7E");
-            superACIP2unicode.put("EEm", "\u0F7B\u0F7E");
-            superACIP2unicode.put("im", "\u0F80\u0F7E");
-            superACIP2unicode.put("'Am", "\u0F71\u0F7E");
-            superACIP2unicode.put("'Im", "\u0F71\u0F72\u0F7E");
-            superACIP2unicode.put("'Em", "\u0F71\u0F7A\u0F7E");
-            superACIP2unicode.put("'Om", "\u0F71\u0F7C\u0F7E");
-            superACIP2unicode.put("'Um", "\u0F71\u0F74\u0F7E");
-            superACIP2unicode.put("'OOm", "\u0F71\u0F7D\u0F7E");
-            superACIP2unicode.put("'EEm", "\u0F71\u0F7B\u0F7E");
-            superACIP2unicode.put("'im", "\u0F71\u0F80\u0F7E");
-
-            superACIP2unicode.put("I:", "\u0F72\u0F7F");
-            superACIP2unicode.put("E:", "\u0F7A\u0F7F");
-            superACIP2unicode.put("O:", "\u0F7C\u0F7F");
-            superACIP2unicode.put("U:", "\u0F74\u0F7F");
-            superACIP2unicode.put("OO:", "\u0F7D\u0F7F");
-            superACIP2unicode.put("EE:", "\u0F7B\u0F7F");
-            superACIP2unicode.put("i:", "\u0F80\u0F7F");
-            superACIP2unicode.put("'A:", "\u0F71\u0F7F");
-            superACIP2unicode.put("'I:", "\u0F71\u0F72\u0F7F");
-            superACIP2unicode.put("'E:", "\u0F71\u0F7A\u0F7F");
-            superACIP2unicode.put("'O:", "\u0F71\u0F7C\u0F7F");
-            superACIP2unicode.put("'U:", "\u0F71\u0F74\u0F7F");
-            superACIP2unicode.put("'OO:", "\u0F71\u0F7D\u0F7F");
-            superACIP2unicode.put("'EE:", "\u0F71\u0F7B\u0F7F");
-            superACIP2unicode.put("'i:", "\u0F71\u0F80\u0F7F");
-
-            superACIP2unicode.put("Im:", "\u0F72\u0F7E\u0F7F");
-            superACIP2unicode.put("Em:", "\u0F7A\u0F7E\u0F7F");
-            superACIP2unicode.put("Om:", "\u0F7C\u0F7E\u0F7F");
-            superACIP2unicode.put("Um:", "\u0F74\u0F7E\u0F7F");
-            superACIP2unicode.put("OOm:", "\u0F7D\u0F7E\u0F7F");
-            superACIP2unicode.put("EEm:", "\u0F7B\u0F7E\u0F7F");
-            superACIP2unicode.put("im:", "\u0F80\u0F7E\u0F7F");
-            superACIP2unicode.put("'Am:", "\u0F71\u0F7E\u0F7F");
-            superACIP2unicode.put("'Im:", "\u0F71\u0F72\u0F7E\u0F7F");
-            superACIP2unicode.put("'Em:", "\u0F71\u0F7A\u0F7E\u0F7F");
-            superACIP2unicode.put("'Om:", "\u0F71\u0F7C\u0F7E\u0F7F");
-            superACIP2unicode.put("'Um:", "\u0F71\u0F74\u0F7E\u0F7F");
-            superACIP2unicode.put("'OOm:", "\u0F71\u0F7D\u0F7E\u0F7F");
-            superACIP2unicode.put("'EEm:", "\u0F71\u0F7B\u0F7E\u0F7F");
-            superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
-            // :m does not appear, though you'd think it's as valid as m:.
-
-            superACIP2unicode.put("m", "\u0F7E");
-            superACIP2unicode.put(":", "\u0F7F");
-            superACIP2unicode.put("m:", "\u0F7E\u0F7F");
-
-            superACIP2unicode.put("Am", "\u0F7E");
-            superACIP2unicode.put("A:", "\u0F7F");
-            superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
-
-            superACIP2unicode.put("0", "\u0F20");
-            superACIP2unicode.put("1", "\u0F21");
-            superACIP2unicode.put("2", "\u0F22");
-            superACIP2unicode.put("3", "\u0F23");
-            superACIP2unicode.put("4", "\u0F24");
-            superACIP2unicode.put("5", "\u0F25");
-            superACIP2unicode.put("6", "\u0F26");
-            superACIP2unicode.put("7", "\u0F27");
-            superACIP2unicode.put("8", "\u0F28");
-            superACIP2unicode.put("9", "\u0F29");
-
-            // punctuation
-            superACIP2unicode.put("&", "\u0F85");
-            superACIP2unicode.put(",", "\u0F0D");
-            superACIP2unicode.put(" ", "\u0F0B");
-            superACIP2unicode.put(".", "\u0F0C");
-            superACIP2unicode.put("`", "\u0F08");
-            superACIP2unicode.put("`", "\u0F08");
-            superACIP2unicode.put("*", "\u0F04\u0F05");
-            superACIP2unicode.put("#", "\u0F04\u0F05\u0F05");
-            superACIP2unicode.put("%", "\u0F35"); // but might be U+0F14, so we warn.
-            superACIP2unicode.put("o", "\u0F37");
-            superACIP2unicode.put(";", "\u0F11");
-            superACIP2unicode.put("\r", "\r");
-            superACIP2unicode.put("\t", "\t");
-            superACIP2unicode.put("\r\n", "\r\n");
-            superACIP2unicode.put("\n", "\n");
-            superACIP2unicode.put("\\", "\u0F84");
-            superACIP2unicode.put("^", "\u0F38");
-
-            // DLC FIXME: "^ GONG" is "^GONG", right?
-            // DLC FIXME: what's the Unicode for x? RC said there is none in plain-text Unicode for x.  But what about in RTF Unicode?
-        }
-        if (subscribed) {
-            String u = (String)subACIP2unicode.get(acip);
-            if (null != u) return u;
-        }
-        return (String)superACIP2unicode.get(acip);
-    }
-
-
-
-    /** Gets the duffcodes for vowel, such that they look good with
-     *  the stack with hash key hashKey, and appends them to r. */
-    static void getDuffForACIPVowel(ArrayList duff, DuffCode preceding, String vowel) {
-        if (null == vowel) return;
-        if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion!  Use assert.
-            throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
-
-        // Order matters here.
-        boolean context_added[] = new boolean[] { false };
-        if (vowel.startsWith("A")) {
-            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
-        } else if (vowel.indexOf("'U") >= 0) {
-            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
-        } else if (vowel.indexOf("'I") >= 0) {
-            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
-        } else {
-            if (vowel.indexOf('\'') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
-            }
-            if (vowel.indexOf("EE") >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
-            } else if (vowel.indexOf('E') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
-            }
-            if (vowel.indexOf("OO") >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
-            } else if (vowel.indexOf('O') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
-            }
-            if (vowel.indexOf('I') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
-            }
-            if (vowel.indexOf('U') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
-            }
-            if (vowel.indexOf('i') >= 0) {
-                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
-            }
-        }
-        // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
-
-        if (vowel.indexOf('m') >= 0) {
-            DuffCode last = (DuffCode)duff.get(duff.size() - 1);
-            duff.remove(duff.size() - 1); // getBindu will add it back...
-            TibTextUtils.getBindu(duff, last);
-        }
-        if (vowel.indexOf(':') >= 0)
-            duff.add(TibetanMachineWeb.getGlyph("H"));
-    }
-
-    /** Returns true if and only if l is the ACIP representation of a
-        letter that can be a suffix.  Note that all postsuffixes are
-        also suffixes.  l must not have an "A" -- use "S", not "SA",
-        that is. */
-    public static boolean isACIPSuffix(String l) {
-        return ("S".equals(l)
-                || "G".equals(l)
-                || "D".equals(l)
-                || "M".equals(l)
-                || "'".equals(l)
-                || "B".equals(l)
-                || "NG".equals(l)
-                || "N".equals(l)
-                || "L".equals(l)
-                || "R".equals(l));
-    }
-
-    /** Returns true if and only if l is the ACIP representation of a
-        letter that can be a prefix.  l must not have an "A" -- use
-        "D", not "DA", that is. */
-    public static boolean isACIPPrefix(String l) {
-        return ("'".equals(l)
-                || "M".equals(l)
-                || "B".equals(l)
-                || "D".equals(l)
-                || "G".equals(l));
-    }
-
-    /** Returns true if and only if l is the ACIP representation of a
-        letter that can be a postsuffix.  l must not have an "A" --
-        use "D", not "DA", that is. */
-    public static boolean isACIPPostsuffix(String l) {
-        return ("S".equals(l)
-                || "D".equals(l));
-    }
-}
--- a/source/org/thdl/tib/text/ttt/ACIPTraits.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTraits.java
@ -18,11 +18,25 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

+import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.StringTokenizer;
+import java.util.List;
+
+import org.thdl.util.ThdlOptions;
+import org.thdl.tib.text.DuffCode;
+import org.thdl.tib.text.THDLWylieConstants;
+import org.thdl.tib.text.TibetanMachineWeb;
+import org.thdl.tib.text.TibTextUtils;
+
+
 /** A singleton class that should contain (but due to laziness and
 *  ignorance probably does not contain) all the traits that make ACIP
- *  transliteration different from other (say, EWTS)
- *  transliterations. */
-final class ACIPTraits implements TTraits {
+ *  transliteration scheme different from other (say, EWTS)
+ *  transliteration schemes.  This is not safe to use in concurrent
+ *  programs but it would be easy to make it so. */
+public final class ACIPTraits implements TTraits {
    /** sole instance of this class */
    private static ACIPTraits singleton = null;

@ -30,7 +44,7 @@ final class ACIPTraits implements TTraits {
    private ACIPTraits() { }

    /** Returns the singleton instance of this class. */
-    public static ACIPTraits instance() {
+    public static /* synchronized */ ACIPTraits instance() {
        if (null == singleton) {
            singleton = new ACIPTraits();
        }
@ -43,15 +57,536 @@ final class ACIPTraits implements TTraits {
    /** Returns '-'. */
    public char disambiguatorChar() { return '-'; }

-    public int maxConsonantLength() { return ACIPRules.MAX_CONSONANT_LENGTH; }
+    public int maxConsonantLength() { return MAX_CONSONANT_LENGTH; }

-    public int maxWowelLength() { return ACIPRules.MAX_WOWEL_LENGTH; }
-
-    public boolean isConsonant(String s) { return ACIPRules.isConsonant(s); }
-
-    public boolean isWowel(String s) { return ACIPRules.isWowel(s); }
+    public int maxWowelLength() { return MAX_WOWEL_LENGTH; }

    public boolean hasSimpleError(TPair p) {
        return ("A".equals(p.getLeft()) && null == p.getRight());
    }
+
+    public String aVowel() { return "A"; }
+
+    public boolean isPostsuffix(String l) {
+        return ("S".equals(l)
+                || "D".equals(l));
+    }
+
+    public boolean isSuffix(String l) {
+        return ("S".equals(l)
+                || "G".equals(l)
+                || "D".equals(l)
+                || "M".equals(l)
+                || "'".equals(l)
+                || "B".equals(l)
+                || "NG".equals(l)
+                || "N".equals(l)
+                || "L".equals(l)
+                || "R".equals(l));
+    }
+
+    public boolean isPrefix(String l) {
+        return ("'".equals(l)
+                || "M".equals(l)
+                || "B".equals(l)
+                || "D".equals(l)
+                || "G".equals(l));
+    }
+
+    private HashMap superACIP2unicode = null;
+    private HashMap subACIP2unicode = null;
+    public /* synchronized */ String getUnicodeFor(String acip, boolean subscribed) {
+        if (superACIP2unicode == null) {
+            final boolean compactUnicode
+                = ThdlOptions.getBooleanOption("thdl.acip.to.unicode.conversions.use.0F52.et.cetera");
+            superACIP2unicode = new HashMap(144);
+            subACIP2unicode = new HashMap(42);
+
+            // oddball:
+            subACIP2unicode.put("V", "\u0FAD");
+
+            superACIP2unicode.put("DH", (compactUnicode ? "\u0F52" : "\u0F51\u0FB7"));
+            subACIP2unicode.put("DH", (compactUnicode ? "\u0FA2" : "\u0FA1\u0FB7"));
+            superACIP2unicode.put("BH", (compactUnicode ? "\u0F57" : "\u0F56\u0FB7"));
+            subACIP2unicode.put("BH", (compactUnicode ? "\u0FA7" : "\u0FA6\u0FB7"));
+            superACIP2unicode.put("dH", (compactUnicode ? "\u0F4D" : "\u0F4C\u0FB7"));
+            subACIP2unicode.put("dH", (compactUnicode ? "\u0F9D" : "\u0F9C\u0FB7"));
+            superACIP2unicode.put("DZH", (compactUnicode ? "\u0F5C" : "\u0F5B\u0FB7"));
+            subACIP2unicode.put("DZH", (compactUnicode ? "\u0FAC" : "\u0FAB\u0FB7"));
+            superACIP2unicode.put("Ksh", (compactUnicode ? "\u0F69" : "\u0F40\u0FB5"));
+            subACIP2unicode.put("Ksh", (compactUnicode ? "\u0FB9" : "\u0F90\u0FB5"));
+            superACIP2unicode.put("GH", (compactUnicode ? "\u0F43" : "\u0F42\u0FB7"));
+            subACIP2unicode.put("GH", (compactUnicode ? "\u0F93" : "\u0F92\u0FB7"));
+            superACIP2unicode.put("K", "\u0F40");
+            subACIP2unicode.put("K", "\u0F90");
+            superACIP2unicode.put("KH", "\u0F41");
+            subACIP2unicode.put("KH", "\u0F91");
+            superACIP2unicode.put("G", "\u0F42");
+            subACIP2unicode.put("G", "\u0F92");
+            superACIP2unicode.put("NG", "\u0F44");
+            subACIP2unicode.put("NG", "\u0F94");
+            superACIP2unicode.put("C", "\u0F45");
+            subACIP2unicode.put("C", "\u0F95");
+            superACIP2unicode.put("CH", "\u0F46");
+            subACIP2unicode.put("CH", "\u0F96");
+            superACIP2unicode.put("J", "\u0F47");
+            subACIP2unicode.put("J", "\u0F97");
+            superACIP2unicode.put("NY", "\u0F49");
+            subACIP2unicode.put("NY", "\u0F99");
+            superACIP2unicode.put("T", "\u0F4F");
+            subACIP2unicode.put("T", "\u0F9F");
+            superACIP2unicode.put("TH", "\u0F50");
+            subACIP2unicode.put("TH", "\u0FA0");
+            superACIP2unicode.put("D", "\u0F51");
+            subACIP2unicode.put("D", "\u0FA1");
+            superACIP2unicode.put("N", "\u0F53");
+            subACIP2unicode.put("N", "\u0FA3");
+            superACIP2unicode.put("P", "\u0F54");
+            subACIP2unicode.put("P", "\u0FA4");
+            superACIP2unicode.put("PH", "\u0F55");
+            subACIP2unicode.put("PH", "\u0FA5");
+            superACIP2unicode.put("B", "\u0F56");
+            subACIP2unicode.put("B", "\u0FA6");
+            superACIP2unicode.put("M", "\u0F58");
+            subACIP2unicode.put("M", "\u0FA8");
+            superACIP2unicode.put("TZ", "\u0F59");
+            subACIP2unicode.put("TZ", "\u0FA9");
+            superACIP2unicode.put("TS", "\u0F5A");
+            subACIP2unicode.put("TS", "\u0FAA");
+            superACIP2unicode.put("DZ", "\u0F5B");
+            subACIP2unicode.put("DZ", "\u0FAB");
+            superACIP2unicode.put("W", "\u0F5D");
+            subACIP2unicode.put("W", "\u0FBA"); // oddball
+            superACIP2unicode.put("ZH", "\u0F5E");
+            subACIP2unicode.put("ZH", "\u0FAE");
+            superACIP2unicode.put("Z", "\u0F5F");
+            subACIP2unicode.put("Z", "\u0FAF");
+            superACIP2unicode.put("'", "\u0F60");
+            subACIP2unicode.put("'", "\u0FB0");
+            superACIP2unicode.put("Y", "\u0F61");
+            subACIP2unicode.put("Y", "\u0FB1");
+            superACIP2unicode.put("R", "\u0F62");
+            subACIP2unicode.put("R", "\u0FB2");
+            superACIP2unicode.put("L", "\u0F63");
+            subACIP2unicode.put("L", "\u0FB3");
+            superACIP2unicode.put("SH", "\u0F64");
+            subACIP2unicode.put("SH", "\u0FB4");
+            superACIP2unicode.put("S", "\u0F66");
+            subACIP2unicode.put("S", "\u0FB6");
+            superACIP2unicode.put("H", "\u0F67");
+            subACIP2unicode.put("H", "\u0FB7");
+            superACIP2unicode.put("A", "\u0F68");
+            subACIP2unicode.put("A", "\u0FB8");
+            superACIP2unicode.put("t", "\u0F4A");
+            subACIP2unicode.put("t", "\u0F9A");
+            superACIP2unicode.put("th", "\u0F4B");
+            subACIP2unicode.put("th", "\u0F9B");
+            superACIP2unicode.put("d", "\u0F4C");
+            subACIP2unicode.put("d", "\u0F9C");
+            superACIP2unicode.put("n", "\u0F4E");
+            subACIP2unicode.put("n", "\u0F9E");
+            superACIP2unicode.put("sh", "\u0F65");
+            subACIP2unicode.put("sh", "\u0FB5");
+
+            superACIP2unicode.put("I", "\u0F72");
+            superACIP2unicode.put("E", "\u0F7A");
+            superACIP2unicode.put("O", "\u0F7C");
+            superACIP2unicode.put("U", "\u0F74");
+            superACIP2unicode.put("OO", "\u0F7D");
+            superACIP2unicode.put("EE", "\u0F7B");
+            superACIP2unicode.put("i", "\u0F80");
+            superACIP2unicode.put("'A", "\u0F71");
+            superACIP2unicode.put("'I", "\u0F71\u0F72");
+            superACIP2unicode.put("'E", "\u0F71\u0F7A");
+            superACIP2unicode.put("'O", "\u0F71\u0F7C");
+            superACIP2unicode.put("'U", "\u0F71\u0F74");
+            superACIP2unicode.put("'OO", "\u0F71\u0F7D");
+            superACIP2unicode.put("'EE", "\u0F71\u0F7B");
+            superACIP2unicode.put("'i", "\u0F71\u0F80");
+
+            superACIP2unicode.put("Im", "\u0F72\u0F7E");
+            superACIP2unicode.put("Em", "\u0F7A\u0F7E");
+            superACIP2unicode.put("Om", "\u0F7C\u0F7E");
+            superACIP2unicode.put("Um", "\u0F74\u0F7E");
+            superACIP2unicode.put("OOm", "\u0F7D\u0F7E");
+            superACIP2unicode.put("EEm", "\u0F7B\u0F7E");
+            superACIP2unicode.put("im", "\u0F80\u0F7E");
+            superACIP2unicode.put("'Am", "\u0F71\u0F7E");
+            superACIP2unicode.put("'Im", "\u0F71\u0F72\u0F7E");
+            superACIP2unicode.put("'Em", "\u0F71\u0F7A\u0F7E");
+            superACIP2unicode.put("'Om", "\u0F71\u0F7C\u0F7E");
+            superACIP2unicode.put("'Um", "\u0F71\u0F74\u0F7E");
+            superACIP2unicode.put("'OOm", "\u0F71\u0F7D\u0F7E");
+            superACIP2unicode.put("'EEm", "\u0F71\u0F7B\u0F7E");
+            superACIP2unicode.put("'im", "\u0F71\u0F80\u0F7E");
+
+            superACIP2unicode.put("I:", "\u0F72\u0F7F");
+            superACIP2unicode.put("E:", "\u0F7A\u0F7F");
+            superACIP2unicode.put("O:", "\u0F7C\u0F7F");
+            superACIP2unicode.put("U:", "\u0F74\u0F7F");
+            superACIP2unicode.put("OO:", "\u0F7D\u0F7F");
+            superACIP2unicode.put("EE:", "\u0F7B\u0F7F");
+            superACIP2unicode.put("i:", "\u0F80\u0F7F");
+            superACIP2unicode.put("'A:", "\u0F71\u0F7F");
+            superACIP2unicode.put("'I:", "\u0F71\u0F72\u0F7F");
+            superACIP2unicode.put("'E:", "\u0F71\u0F7A\u0F7F");
+            superACIP2unicode.put("'O:", "\u0F71\u0F7C\u0F7F");
+            superACIP2unicode.put("'U:", "\u0F71\u0F74\u0F7F");
+            superACIP2unicode.put("'OO:", "\u0F71\u0F7D\u0F7F");
+            superACIP2unicode.put("'EE:", "\u0F71\u0F7B\u0F7F");
+            superACIP2unicode.put("'i:", "\u0F71\u0F80\u0F7F");
+
+            superACIP2unicode.put("Im:", "\u0F72\u0F7E\u0F7F");
+            superACIP2unicode.put("Em:", "\u0F7A\u0F7E\u0F7F");
+            superACIP2unicode.put("Om:", "\u0F7C\u0F7E\u0F7F");
+            superACIP2unicode.put("Um:", "\u0F74\u0F7E\u0F7F");
+            superACIP2unicode.put("OOm:", "\u0F7D\u0F7E\u0F7F");
+            superACIP2unicode.put("EEm:", "\u0F7B\u0F7E\u0F7F");
+            superACIP2unicode.put("im:", "\u0F80\u0F7E\u0F7F");
+            superACIP2unicode.put("'Am:", "\u0F71\u0F7E\u0F7F");
+            superACIP2unicode.put("'Im:", "\u0F71\u0F72\u0F7E\u0F7F");
+            superACIP2unicode.put("'Em:", "\u0F71\u0F7A\u0F7E\u0F7F");
+            superACIP2unicode.put("'Om:", "\u0F71\u0F7C\u0F7E\u0F7F");
+            superACIP2unicode.put("'Um:", "\u0F71\u0F74\u0F7E\u0F7F");
+            superACIP2unicode.put("'OOm:", "\u0F71\u0F7D\u0F7E\u0F7F");
+            superACIP2unicode.put("'EEm:", "\u0F71\u0F7B\u0F7E\u0F7F");
+            superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
+            // :m does not appear, though you'd think it's as valid as m:.
+
+            superACIP2unicode.put("m", "\u0F7E");
+            superACIP2unicode.put(":", "\u0F7F");
+            superACIP2unicode.put("m:", "\u0F7E\u0F7F");
+
+            superACIP2unicode.put("Am", "\u0F7E");
+            superACIP2unicode.put("A:", "\u0F7F");
+            superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
+
+            superACIP2unicode.put("0", "\u0F20");
+            superACIP2unicode.put("1", "\u0F21");
+            superACIP2unicode.put("2", "\u0F22");
+            superACIP2unicode.put("3", "\u0F23");
+            superACIP2unicode.put("4", "\u0F24");
+            superACIP2unicode.put("5", "\u0F25");
+            superACIP2unicode.put("6", "\u0F26");
+            superACIP2unicode.put("7", "\u0F27");
+            superACIP2unicode.put("8", "\u0F28");
+            superACIP2unicode.put("9", "\u0F29");
+
+            // punctuation
+            superACIP2unicode.put("&", "\u0F85");
+            superACIP2unicode.put(",", "\u0F0D");
+            superACIP2unicode.put(" ", "\u0F0B");
+            superACIP2unicode.put(".", "\u0F0C");
+            superACIP2unicode.put("`", "\u0F08");
+            superACIP2unicode.put("`", "\u0F08");
+            superACIP2unicode.put("*", "\u0F04\u0F05");
+            superACIP2unicode.put("#", "\u0F04\u0F05\u0F05");
+            superACIP2unicode.put("%", "\u0F35"); // but might be U+0F14, so we warn.
+            superACIP2unicode.put("o", "\u0F37");
+            superACIP2unicode.put(";", "\u0F11");
+            superACIP2unicode.put("\r", "\r");
+            superACIP2unicode.put("\t", "\t");
+            superACIP2unicode.put("\r\n", "\r\n");
+            superACIP2unicode.put("\n", "\n");
+            superACIP2unicode.put("\\", "\u0F84");
+            superACIP2unicode.put("^", "\u0F38");
+
+            // DLC FIXME: "^ GONG" is "^GONG", right?
+            // DLC FIXME: what's the Unicode for x? RC said there is none in plain-text Unicode for x.  But what about in RTF Unicode?
+        }
+        if (subscribed) {
+            String u = (String)subACIP2unicode.get(acip);
+            if (null != u) return u;
+        }
+        return (String)superACIP2unicode.get(acip);
+    }
+
+    private HashMap acipOther2wylie = null;
+    public /* synchronized */ String getEwtsForOther(String acip) {
+        if (acipOther2wylie == null) {
+            acipOther2wylie = new HashMap(20);
+
+            // don't use putMapping for this.  We don't want TMW->ACIP
+            // to produce "." for a U+0F0C because ACIP doesn't say
+            // that "." means U+0F0C.  It just seems to in practice
+            // for ACIP Release IV texts.
+            acipOther2wylie.put(".", "*");
+
+            putMapping(acipOther2wylie, "m", "M");
+            putMapping(acipOther2wylie, ":", "H");
+            putMapping(acipOther2wylie, ",", "/");
+            putMapping(acipOther2wylie, " ", " ");
+            putMapping(acipOther2wylie, ";", "|");
+            putMapping(acipOther2wylie, "`", "!");
+            putMapping(acipOther2wylie, "*", "@#");
+            // There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
+            putMapping(acipOther2wylie, "%", "~X");
+            putMapping(acipOther2wylie, "o", "X");
+            putMapping(acipOther2wylie, "&", "&");
+            putMapping(acipOther2wylie, "^", "\\u0F38");
+
+            putMapping(acipOther2wylie, "0", "0");
+            putMapping(acipOther2wylie, "1", "1");
+            putMapping(acipOther2wylie, "2", "2");
+            putMapping(acipOther2wylie, "3", "3");
+            putMapping(acipOther2wylie, "4", "4");
+            putMapping(acipOther2wylie, "5", "5");
+            putMapping(acipOther2wylie, "6", "6");
+            putMapping(acipOther2wylie, "7", "7");
+            putMapping(acipOther2wylie, "8", "8");
+            putMapping(acipOther2wylie, "9", "9");
+        }
+        return (String)acipOther2wylie.get(acip);
+    }
+
+    public TTshegBarScanner scanner() { return ACIPTshegBarScanner.instance(); }
+
+    /** Registers acip->wylie mappings in toWylie; registers
+        wylie->acip mappings in {@link #wylieToACIP}. */
+    private /* synchronized */ void putMapping(HashMap toWylie, String ACIP, String EWTS) {
+        toWylie.put(ACIP, EWTS);
+        if (null == wylieToACIP) {
+            wylieToACIP = new HashMap(75);
+
+            // We don't want to put "/" in toWylie:
+            wylieToACIP.put("(", "/");
+            wylieToACIP.put(")", "/");
+            wylieToACIP.put("?", "\\");
+
+            wylieToACIP.put("_", " "); // oddball.
+            wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
+        }
+        wylieToACIP.put(EWTS, ACIP);
+    }
+
+    /** A map from EWTS to ACIP.  Note that the EWTS "w" maps to both
+        "V" and "W" in reality but this map will only give one or the
+        other. */
+    private HashMap wylieToACIP = null;
+    /** Returns the ACIP transliteration corresponding to the THDL
+        Extended Wylie <em>atom</em> EWTS, or null if EWTS is not
+        recognized. */
+    public String getACIPForEWTS(String EWTS) {
+        getEwtsForConsonant(null); // inits wylieToACIP
+        getEwtsForOther(null); // inits wylieToACIP
+        getEwtsForWowel(null); // inits wylieToACIP
+        String ans = (String)wylieToACIP.get(EWTS);
+        boolean useCapitalW = false;
+        if (EWTS.startsWith("w"))
+            useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
+        if (null == ans) {
+            StringBuffer finalAns = new StringBuffer(EWTS.length());
+            StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
+            while (sTok.hasMoreTokens()) {
+                String part, tok = sTok.nextToken();
+                if (tok.equals("-") || tok.equals("+"))
+                    part = tok;
+                else {
+                    if ("w".equals(tok)) {
+                        // There are only two stacks in TMW that have
+                        // U+0FBA: R+Wa and w+Wa.  TMW->ACIP fails for
+                        // these unless we handle it here.  (FIXME:
+                        // add an automated test for this).
+                        if ("R+W".equals(EWTS) || "w+W".equals(EWTS)) {
+                            part = "W";
+                        } else {
+                            part = "V";
+                        }
+                    } else {
+                        part = (String)wylieToACIP.get(tok);
+                    }
+                }
+                if (null == part) return null;
+                finalAns.append(part);
+            }
+            if (useCapitalW)
+                finalAns.setCharAt(0, 'W');
+            return finalAns.toString();
+        }
+        if (useCapitalW)
+            return "W" + ans.substring(1);
+        else
+            return ans;
+    }
+
+    private HashMap acipConsonant2wylie = null;
+    /** Returns "W" for ACIP "W", "r" for ACIP "R", y for ACIP "Y",
+     *  even though sometimes the EWTS for those is "w", "R", or "Y".
+     *  Handle that in the caller. */
+    public /* synchronized */ String getEwtsForConsonant(String acip) {
+        if (acipConsonant2wylie == null) {
+            acipConsonant2wylie = new HashMap(37);
+
+            // oddball:
+            putMapping(acipConsonant2wylie, "V", "w");
+
+            // more oddballs:
+            putMapping(acipConsonant2wylie, "DH", "d+h");
+            putMapping(acipConsonant2wylie, "BH", "b+h");
+            putMapping(acipConsonant2wylie, "dH", "D+h");
+            putMapping(acipConsonant2wylie, "DZH", "dz+h"); // longest, MAX_CONSONANT_LENGTH characters
+            putMapping(acipConsonant2wylie, "Ksh", "k+Sh"); // longest, MAX_CONSONANT_LENGTH characters
+            putMapping(acipConsonant2wylie, "GH", "g+h");
+
+
+            putMapping(acipConsonant2wylie, "K", "k");
+            putMapping(acipConsonant2wylie, "KH", "kh");
+            putMapping(acipConsonant2wylie, "G", "g");
+            putMapping(acipConsonant2wylie, "NG", "ng");
+            putMapping(acipConsonant2wylie, "C", "c");
+            putMapping(acipConsonant2wylie, "CH", "ch");
+            putMapping(acipConsonant2wylie, "J", "j");
+            putMapping(acipConsonant2wylie, "NY", "ny");
+            putMapping(acipConsonant2wylie, "T", "t");
+            putMapping(acipConsonant2wylie, "TH", "th");
+            putMapping(acipConsonant2wylie, "D", "d");
+            putMapping(acipConsonant2wylie, "N", "n");
+            putMapping(acipConsonant2wylie, "P", "p");
+            putMapping(acipConsonant2wylie, "PH", "ph");
+            putMapping(acipConsonant2wylie, "B", "b");
+            putMapping(acipConsonant2wylie, "M", "m");
+            putMapping(acipConsonant2wylie, "TZ", "ts");
+            putMapping(acipConsonant2wylie, "TS", "tsh");
+            putMapping(acipConsonant2wylie, "DZ", "dz");
+            putMapping(acipConsonant2wylie, "W", "W"
+                       /* NOTE WELL: sometimes "w", sometimes "W".
+                          Handle this in the caller.
+                          
+                          Reasoning for "W" instead of "w": r-w and
+                          r+w are both known hash keys.  We sort 'em
+                          out this way.  (They are the only things
+                          like this according to bug report #800166.)  */
+                       );
+            putMapping(acipConsonant2wylie, "ZH", "zh");
+            putMapping(acipConsonant2wylie, "Z", "z");
+            putMapping(acipConsonant2wylie, "'", "'");
+            putMapping(acipConsonant2wylie, "Y", "y");
+            putMapping(acipConsonant2wylie, "R", "r");
+            putMapping(acipConsonant2wylie, "L", "l");
+            putMapping(acipConsonant2wylie, "SH", "sh");
+            putMapping(acipConsonant2wylie, "S", "s");
+            putMapping(acipConsonant2wylie, "H", "h");
+            putMapping(acipConsonant2wylie, "A", "a");
+            putMapping(acipConsonant2wylie, "t", "T");
+            putMapping(acipConsonant2wylie, "th", "Th");
+            putMapping(acipConsonant2wylie, "d", "D");
+            putMapping(acipConsonant2wylie, "n", "N");
+            putMapping(acipConsonant2wylie, "sh", "Sh");
+        }
+        return (String)acipConsonant2wylie.get(acip);
+    }
+
+    private HashMap acipWowel2wylie = null;
+    public /* synchronized */ String getEwtsForWowel(String acip) {
+        if (acipWowel2wylie == null) {
+            acipWowel2wylie = new HashMap(baseVowels.length * 4);
+
+            for (int i = 0; i < baseVowels.length; i++) {
+                putMapping(acipWowel2wylie, baseVowels[i][0], baseVowels[i][1]);
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0], baseVowels[i][2]);
+                putMapping(acipWowel2wylie, baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + 'm', baseVowels[i][2] + 'M');
+                putMapping(acipWowel2wylie, baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + ':', baseVowels[i][2] + 'H');
+                putMapping(acipWowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
+                putMapping(acipWowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
+            }
+            // {Pm} is treated just like {PAm}; {P:} is treated just
+            // like {PA:}; {Pm:} is treated just like {PAm:}.  But
+            // that happens thanks to
+            // TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
+
+            // Keep this code in sync with getUnicodeFor.
+        }
+        return (String)acipWowel2wylie.get(acip);
+    }
+
+    /** {Ksh}, the longest consonant, has 3 characters, so this is
+     *  three. */
+    private static int MAX_CONSONANT_LENGTH = 3;
+
+    /** {'EEm:}, the longest wowel, has 5 characters, so this is
+     *  five. */
+    private static int MAX_WOWEL_LENGTH = 5;
+
+    private static String[][] baseVowels = new String[][] {
+        // { ACIP, EWTS, EWTS for ACIP {'\'' + baseVowels[][0]}, vowel
+        // numbers (see TibetanMachineWeb's VOWEL_A, VOWEL_o, etc.) 
+        // for ACIP, vowel numbers for ACIP {'\'' + baseVowels[][0]}
+        { "A", "a", "A" },
+        { "I", "i", "I" },
+        { "U", "u", "U" },
+        { "E", "e", "Ae" },
+        { "O", "o", "Ao" },
+        { "EE", "ai", "Aai" },
+        { "OO", "au", "Aau" },
+        { "i", "-i", "A-i" }
+    };
+
+    /** Returns true if and only if s is an ACIP wowel.  You can't
+     *  just call this any time -- A is both a consonant and a vowel
+     *  in ACIP, so you have to call this in the right context. */
+    public boolean isWowel(String s) {
+        // I'm on my own with 'O and 'E and 'OO and 'EE, but GANG'O
+        // appears and I wonder... so here they are.  It's consistent
+        // with 'I and 'A and 'U, at least: all the vowels may appear
+        // as K'vowel.  DLC FIXME: ask.
+        return (null != getEwtsForWowel(s));
+    }
+
+    /** Returns true if and only if s is an ACIP consonant. */
+    public boolean isConsonant(String s) {
+        return (null != getEwtsForConsonant(s));
+    }
+
+    /** Gets the duffcodes for wowel, such that they look good with
+     *  the preceding glyph, and appends them to duff. */
+    public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
+        if (null == wowel) return;
+        if (null == getEwtsForWowel(wowel)) // FIXME: expensive assertion!  Use assert.
+            throw new IllegalArgumentException("Wowel " + wowel + " isn't in the small set of wowels we handle correctly.");
+
+        // Order matters here.
+        boolean context_added[] = new boolean[] { false };
+        if (wowel.startsWith("A")) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.WYLIE_aVOWEL, context_added);
+        } else if (wowel.indexOf("'U") >= 0) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.U_VOWEL, context_added);
+        } else if (wowel.indexOf("'I") >= 0) {
+            TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.I_VOWEL, context_added);
+        } else {
+            if (wowel.indexOf('\'') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.A_VOWEL, context_added);
+            }
+            if (wowel.indexOf("EE") >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.ai_VOWEL, context_added);
+            } else if (wowel.indexOf('E') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.e_VOWEL, context_added);
+            }
+            if (wowel.indexOf("OO") >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.au_VOWEL, context_added);
+            } else if (wowel.indexOf('O') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.o_VOWEL, context_added);
+            }
+            if (wowel.indexOf('I') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.i_VOWEL, context_added);
+            }
+            if (wowel.indexOf('U') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.u_VOWEL, context_added);
+            }
+            if (wowel.indexOf('i') >= 0) {
+                TibTextUtils.getVowel(duff, preceding, THDLWylieConstants.reverse_i_VOWEL, context_added);
+            }
+        }
+        // FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
+
+        if (wowel.indexOf('m') >= 0) {
+            DuffCode last = (DuffCode)duff.get(duff.size() - 1);
+            duff.remove(duff.size() - 1); // getBindu will add it back...
+            TibTextUtils.getBindu(duff, last);
+        }
+        if (wowel.indexOf(':') >= 0)
+            duff.add(TibetanMachineWeb.getGlyph(getEwtsForOther(":")));
+    }
 }
+
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -18,11 +18,10 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

-import java.io.*;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Stack;

-import org.thdl.util.ThdlDebug;
 import org.thdl.util.ThdlOptions;

 /**
@ -36,8 +35,10 @@ import org.thdl.util.ThdlOptions;
 * the parser, not here in the lexical analyzer.  That'd be cleaner,
 * and more like how you'd do things if you used lex and yacc.
 *
+* This is not public because you should use {@link ACIPTraits#scanner()}.
+*
 * @author David Chandler */
-public class ACIPTshegBarScanner extends TTshegBarScanner {
+class ACIPTshegBarScanner extends TTshegBarScanner {
    /** True if those ACIP snippets inside square brackets (e.g.,
        "[THIS]") are to be passed through into the output unmodified
        while retaining the brackets and if those ACIP snippets inside
--- a/source/org/thdl/tib/text/ttt/EWTSTraits.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTraits.java
@ -18,11 +18,14 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

+import java.util.ArrayList;
+import org.thdl.tib.text.DuffCode;
+
 /** A singleton class that should contain (but due to laziness and
 *  ignorance probably does not contain) all the traits that make EWTS
 *  transliteration different from other (say, ACIP) transliteration
 *  schemes. */
-final class EWTSTraits implements TTraits {
+public final class EWTSTraits implements TTraits {
    /** sole instance of this class */
    private static EWTSTraits singleton = null;

@ -30,7 +33,7 @@ final class EWTSTraits implements TTraits {
    private EWTSTraits() { }

    /** */
-    public static EWTSTraits instance() {
+    public static synchronized EWTSTraits instance() {
        if (null == singleton) {
            singleton = new EWTSTraits();
        }
@ -79,4 +82,48 @@ final class EWTSTraits implements TTraits {
                || "H".equals(s)
                || "M".equals(s)); // TODO(DLC)[EWTS->Tibetan]:???
    }
+
+    public String aVowel() { return "a"; }
+
+    public boolean isPostsuffix(String s) {
+        return ("s".equals(s) || "d".equals(s));
+    }
+
+    public boolean isPrefix(String l) {
+        return ("'".equals(l)
+                || "m".equals(l)
+                || "b".equals(l)
+                || "d".equals(l)
+                || "g".equals(l));
+    }
+
+    public boolean isSuffix(String l) {
+        return ("s".equals(l)
+                || "g".equals(l)
+                || "d".equals(l)
+                || "m".equals(l)
+                || "'".equals(l)
+                || "b".equals(l)
+                || "ng".equals(l)
+                || "n".equals(l)
+                || "l".equals(l)
+                || "r".equals(l));
+    }
+
+    /** Returns l, since this is EWTS's traits class. */
+    public String getEwtsForConsonant(String l) { return l; }
+
+    /** Returns l, since this is EWTS's traits class. */
+    public String getEwtsForOther(String l) { return l; }
+
+    /** Returns l, since this is EWTS's traits class. */
+    public String getEwtsForWowel(String l) { return l; }
+
+    public TTshegBarScanner scanner() { return EWTSTshegBarScanner.instance(); }
+
+    public void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel) {
+        throw new Error("TODO(DLC)[EWTS->Tibetan]");
+    }
+
+    public String getUnicodeFor(String l, boolean subscribed) { throw new Error("TODO(DLC)[EWTS->Tibetan]"); }
 }
--- a/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/EWTSTshegBarScanner.java
@ -0,0 +1,56 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import java.util.ArrayList;
+
+/**
+* This singleton class is able to break up Strings of EWTS text (for
+* example, an entire sutra file) into tsheg bars, comments, etc.
+* Non-Tibetan parts are segregated (so that consumers can ensure that
+* they remain non-Tibetan), and Tibetan passages are broken up into
+* tsheg bars.
+*
+* This is not public because you should use {@link EWTSTraits#scanner()}.
+*
+* @author David Chandler */
+class EWTSTshegBarScanner extends TTshegBarScanner {
+    /** See the comment in TTshegBarScanner.  This does not find
+        errors and warnings that you'd think of a parser finding (DLC
+        DOES IT?). */
+    public ArrayList scan(String s, StringBuffer errors, int maxErrors,
+                          boolean shortMessages, String warningLevel) {
+        // the size depends on whether it's mostly Tibetan or mostly
+        // Latin and a number of other factors.  This is meant to be
+        // an underestimate, but not too much of an underestimate.
+        ArrayList al = new ArrayList(s.length() / 10);
+        throw new Error("DLC unimplemented");
+    }
+
+    /** non-public because this is a singleton */
+    protected EWTSTshegBarScanner() { }
+    private static EWTSTshegBarScanner singleton = null;
+    /** Returns the sole instance of this class. */
+    public synchronized static EWTSTshegBarScanner instance() {
+        if (null == singleton) {
+            singleton = new EWTSTshegBarScanner();
+        }
+        return singleton;
+    }
+}
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -202,15 +202,16 @@ public class PackageTest extends TestCase {
        message. */
    static String ACIP2TMW2Translit(boolean EWTSNotACIP, String ACIP) {
        StringBuffer errors = new StringBuffer();
-        ArrayList al = ACIPTshegBarScanner.instance().scan(ACIP, errors, -1,
-                                                           false, "None");
+        ArrayList al = ACIPTraits.instance().scanner().scan(ACIP, errors, -1,
+                                                            false, "None");
        if (null == al || errors.length() > 0)
            return null;
        org.thdl.tib.text.TibetanDocument tdoc
            = new org.thdl.tib.text.TibetanDocument();
        int loc[] = new int[] { 0 };
        try {
-            if (!TConverter.convertToTMW(al,
+            if (!TConverter.convertToTMW(ACIPTraits.instance(),
+                                         al,
                                         tdoc,
                                         null,
                                         null,
@ -7358,8 +7359,8 @@ tstHelper("ZUR");

    private static void shelp(String s, String expectedErrors, String expectedScan, String warningLevel) {
        StringBuffer errors = new StringBuffer();
-        ArrayList al = ACIPTshegBarScanner.instance().scan(s, errors, -1, false,
-                                                           warningLevel);
+        ArrayList al = ACIPTraits.instance().scanner().scan(s, errors, -1, false,
+                                                            warningLevel);
        if (null != expectedScan) {
            if (!al.toString().equals(expectedScan)) {
                System.out.println("Scanning " + s + " into tsheg bars was expected to cause the following scan:");
@ -7392,7 +7393,7 @@ tstHelper("ZUR");

    /** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer,
        int, boolean)}. */
-    public void testScanner() {
+    public void testAcipScanner() {
        shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");

        shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");
@ -7682,7 +7683,8 @@ tstHelper("ZUR");
    private static void uhelp(String acip, String expectedUnicode,
                              String warningLevel, boolean shortMessages) {
        StringBuffer errors = new StringBuffer();
-        String unicode = TConverter.convertToUnicodeText("ACIP", acip, errors,
+        String unicode = TConverter.convertToUnicodeText(ACIPTraits.instance(),
+                                                         acip, errors,
                                                         null, true,
                                                         warningLevel,
                                                         shortMessages);
--- a/source/org/thdl/tib/text/ttt/TConverter.java
+++ b/source/org/thdl/tib/text/ttt/TConverter.java
@ -69,10 +69,10 @@ public class TConverter {
        boolean shortMessages = false;
        String warningLevel = "Most";
        ArrayList al
-            = ACIPTshegBarScanner.instance().scanFile(args[0], errors,
-                                                      maxErrors - 1,
-                                                      shortMessages,
-                                                      warningLevel);
+            = ACIPTraits.instance().scanner().scanFile(args[0], errors,
+                                                       maxErrors - 1,
+                                                       shortMessages,
+                                                       warningLevel);

        if (null == al) {
            System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
@ -103,8 +103,9 @@ public class TConverter {
            warnings = new StringBuffer();
            putWarningsInOutput = true;
        }
-        convertToTMW(al, System.out, errors, warnings, null,
-                     putWarningsInOutput, warningLevel, shortMessages, colors);
+        convertToTMW(ACIPTraits.instance(), al, System.out, errors, warnings,
+                     null, putWarningsInOutput, warningLevel, shortMessages,
+                     colors);
        int retCode = 0;
        if (errors.length() > 0) {
            System.err.println("Errors converting ACIP input file: ");
@ -139,7 +140,8 @@ public class TConverter {
     *  prefix rules in another
     *  @throws IOException if we cannot write to out
     */
-    public static boolean convertToTMW(ArrayList scan,
+    public static boolean convertToTMW(TTraits ttraits,
+                                       ArrayList scan,
                                       OutputStream out,
                                       StringBuffer errors,
                                       StringBuffer warnings,
@ -152,7 +154,8 @@ public class TConverter {
    {
        TibetanDocument tdoc = new TibetanDocument();
        boolean rv
-            = convertToTMW(scan, tdoc, errors, warnings, hasWarnings,
+            = convertToTMW(ttraits,
+                           scan, tdoc, errors, warnings, hasWarnings,
                           writeWarningsToResult, warningLevel,
                           shortMessages, colors,
                           new int[] { tdoc.getLength() });
@ -169,7 +172,8 @@ public class TConverter {
        offset from zero inside tdoc at which conversion results will
        be placed.  On output, loc[0] is one past the offset of the
        last of the conversion results. */
-    public static boolean convertToTMW(ArrayList scan,
+    public static boolean convertToTMW(TTraits ttraits,
+                                       ArrayList scan,
                                       TibetanDocument tdoc,
                                       StringBuffer errors,
                                       StringBuffer warnings,
@ -181,7 +185,8 @@ public class TConverter {
                                       int[] loc)
        throws IOException
    {
-        return convertTo(false, true, scan, null, tdoc, errors, warnings,
+        return convertTo(false, true,
+                         ttraits, scan, null, tdoc, errors, warnings,
                         hasWarnings, writeWarningsToResult, warningLevel,
                         shortMessages, colors, loc,
                         loc[0] == tdoc.getLength());
@ -189,33 +194,30 @@ public class TConverter {

    /** Returns UTF-8 encoded Unicode.  A bit indirect, so use this
     *  for testing only if performance is a concern.  If errors occur
-     *  in scanning the ACIP or in converting a tsheg bar, then they
-     *  are appended to errors if errors is non-null, as well as
-     *  written to the result.  If warnings occur in scanning the ACIP
-     *  or in converting a tsheg bar, then they are appended to
-     *  warnings if warnings is non-null, and they are written to the
-     *  result if writeWarningsToResult is true.  Error and warning
-     *  messages are long and self-contained unless shortMessages is
-     *  true.  Returns the conversion upon perfect success or if there
-     *  were merely warnings, null if errors occurred.  */
-    public static String convertToUnicodeText(String transliteration,
-                                              String acip,
+     *  in scanning the transliteration or in converting a tsheg bar,
+     *  then they are appended to errors if errors is non-null, as
+     *  well as written to the result.  If warnings occur in scanning
+     *  the transliteration or in converting a tsheg bar, then they
+     *  are appended to warnings if warnings is non-null, and they are
+     *  written to the result if writeWarningsToResult is true.  Error
+     *  and warning messages are long and self-contained unless
+     *  shortMessages is true.  Returns the conversion upon perfect
+     *  success or if there were merely warnings, null if errors
+     *  occurred.  */
+    public static String convertToUnicodeText(TTraits ttraits,
+                                              String translit,
                                              StringBuffer errors,
                                              StringBuffer warnings,
                                              boolean writeWarningsToResult,
                                              String warningLevel,
                                              boolean shortMessages) {
-        if (transliteration != "ACIP") {
-            ThdlDebug.noteIffyCode();
-            throw new IllegalArgumentException("Unsupported transliteration");
-        }
        ByteArrayOutputStream sw = new ByteArrayOutputStream();
        ArrayList al
-            = ACIPTshegBarScanner.instance().scan(acip, errors, -1,
-                                                  shortMessages, warningLevel);
+            = ttraits.scanner().scan(translit, errors, -1, shortMessages,
+                                     warningLevel);
        try {
            if (null != al) {
-                convertToUnicodeText(al, sw, errors,
+                convertToUnicodeText(ttraits, al, sw, errors,
                                     warnings, null, writeWarningsToResult,
                                     warningLevel, shortMessages);
                return sw.toString("UTF-8");
@ -236,7 +238,8 @@ public class TConverter {
     *  writeWarningsToOut is true, then warnings also will be written
     *  to out.
     *  @return true upon perfect success, false if errors occurred.
-     *  @param scan result of ACIPTshegBarScanner.scan(..)
+     *  @param scan result of using ttraits.scanner() to break up the
+     *  original string of transliteration
     *  @param out stream to which to write converted text
     *  @param errors if non-null, all error messages are appended
     *  @param warnings if non-null, all warning messages appropriate
@ -246,9 +249,9 @@ public class TConverter {
     *  false otherwise
     *  @param writeWarningsToOut if true, then all warning messages
     *  are written to out in the appropriate places
-     *  @throws IOException if we cannot write to out
-     */
-    public static boolean convertToUnicodeText(ArrayList scan,
+     *  @throws IOException if we cannot write to out */
+    public static boolean convertToUnicodeText(TTraits ttraits,
+                                               ArrayList scan,
                                               OutputStream out,
                                               StringBuffer errors,
                                               StringBuffer warnings,
@ -258,7 +261,8 @@ public class TConverter {
                                               boolean shortMessages)
        throws IOException
    {
-        return convertTo(true, false, scan, out, null, errors, warnings,
+        return convertTo(true, false,
+                         ttraits, scan, out, null, errors, warnings,
                         hasWarnings, writeWarningsToOut, warningLevel,
                         shortMessages, false, new int[] { -1 } , true);
    }
@ -283,6 +287,7 @@ public class TConverter {

    private static boolean convertTo(boolean toUnicode, // else to TMW
                                     boolean toRTF, // else to UTF-8-encoded text
+                                     TTraits ttraits,
                                     ArrayList scan,
                                     OutputStream out, // for (toUnicode && !toRTF) mode
                                     TibetanDocument tdoc, // for !toUnicode mode or (toUnicode && toRTF) mode
@ -368,7 +373,7 @@ public class TConverter {
                if (lastGuyWasNonPunct) {
                    String err = "[#ERROR " + ErrorsAndWarnings.getMessage(133, shortMessages, s.getText()) + "]";
                    if (null != writer) {
-                        String uni = ACIPRules.getUnicodeFor(s.getText(), false);
+                        String uni = ttraits.getUnicodeFor(s.getText(), false);
                        if (null == uni) {
                            hasErrors = true;
                            uni = err;
@ -377,7 +382,7 @@ public class TConverter {
                    }
                    if (null != tdoc) {
                        String wylie
-                            = ACIPRules.getWylieForACIPOther(s.getText());
+                            = ttraits.getEwtsForOther(s.getText());
                        if (null == wylie) {
                            hasErrors = true;
                            tdoc.appendRoman(tdocLocation[0], err, Color.RED);
@ -658,7 +663,7 @@ public class TConverter {
                            }

                            if (!done) {
-                                if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false);
+                                if (null != writer) unicode = ttraits.getUnicodeFor(s.getText(), false);
                                if (null != tdoc) {
                                    if (s.getText().equals("\r")
                                        || s.getText().equals("\t")
@ -675,7 +680,7 @@ public class TConverter {
                                                TibetanMachineWeb.getGlyph("#")
                                            }; // hard-coded EWTS values
                                        } else {
-                                            String wy = ACIPRules.getWylieForACIPOther(s.getText());
+                                            String wy = ttraits.getEwtsForOther(s.getText());
                                            if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
                                            duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
                                        }
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -26,22 +26,27 @@ import java.util.ArrayList;

 /** An ordered pair used in ACIP/EWTS-to-TMW/Unicode conversion.  The
 *  left side is the consonant or empty; the right side is either the
- *  vowel or '+' (indicating stacking) or a disambiguator (i.e., '-'
- *  in ACIP or '.' in EWTS).
+ *  vowel or '+' (indicating stacking in both ACIP and EWTS) or a
+ *  disambiguator (e.g., '-' in ACIP or '.' in EWTS).
 *  @author David Chandler */
 /* BIG FIXME: make this package work for EWTS, not just ACIP.  (TODO(DLC)[EWTS->Tibetan]: does it?) */
 class TPair {
-    /** The left side, or null if there is no left side.  That is, the
-     *  non-vowel, non-'m', non-':', non-'-', non-'+' guy. */
+    /** the part that knows ACIP from EWTS */
+    private TTraits traits;
+
+    /** Returns the part that knows ACIP from EWTS. */
+    public TTraits getTraits() { return traits; }
+
+    /** The left side, or null if there is no left side.  I.e., the
+     *  non-wowel, non-disambiguator, non-'+' guy. */
    private String l;
    String getLeft() {
        ThdlDebug.verify(!"".equals(l));
        return l;
    }

-    /** The right side. That is, the vowel, with 'm' or ':' "vowel"
-     *  after it if appropriate, or "-" (disambiguator), or "+"
-     *  (stacking), or null otherwise. */
+    /** The right side. That is, the wowel or disambiguator or "+"
+     *  (for stacking) or null otherwise. */
    private String r;
    String getRight() {
        ThdlDebug.verify(!"".equals(r));
@ -50,13 +55,14 @@ class TPair {

    /** Constructs a new TPair with left side l and right side r.
     *  Use null or the empty string to represent an absence. */
-    TPair(String l, String r) {
+    TPair(TTraits traits, String l, String r) {
        // Normalize:
        if (null != l && l.equals("")) l = null;
        if (null != r && r.equals("")) r = null;

        this.l = l;
        this.r = r;
+        this.traits = traits;
    }

    /** Returns a nice String representation.  Returns "(D . E)" for
@ -67,8 +73,8 @@ class TPair {
            + ((null == r) ? "" : r) + ")";
    }

-    /** Returns the number of ACIP characters that make up this
-     *  TPair. */
+    /** Returns the number of transliteration characters that make up
+     *  this TPair. */
    int size() {
        return (((l == null) ? 0 : l.length())
                + ((r == null) ? 0 : r.length()));
@ -98,18 +104,18 @@ class TPair {
            sz = l.length();
            newL = l.substring(0, sz - N);
        }
-        return new TPair(newL, newR);
+        return new TPair(traits, newL, newR);
    }

-    /** Returns true if and only if this is nonempty and is l, if
-     *  present, is a legal ACIP consonant, and is r, if present, is a
-     *  legal ACIP vowel. */
+    /** Returns true if and only if this is nonempty and if l, if
+     *  present, is a legal consonant, and if r, if present, is a
+     *  legal wowel. */
    boolean isLegal() {
        if (size() < 1)
            return false;
-        if (null != l && !ACIPRules.isConsonant(l))
+        if (null != l && !traits.isConsonant(l))
            return false;
-        if (null != r && !ACIPRules.isWowel(r))
+        if (null != r && !traits.isWowel(r))
            return false;
        return true;
    }
@ -119,9 +125,9 @@ class TPair {
    boolean isPrefix() {
        return (null != l
                && ((null == r || "".equals(r))
-                    || "-".equals(r) // TODO(DLC)[EWTS->Tibetan]
-                    || "A".equals(r)) // FIXME: though check for BASKYABS and warn because BSKYABS is more common
-                && ACIPRules.isACIPPrefix(l));
+                    || traits.disambiguator().equals(r)
+                    || traits.aVowel().equals(r)) // FIXME: though check for BASKYABS and warn because BSKYABS is more common
+                && traits.isPrefix(l));
    }

    /** Returns true if and only if this pair could be a Tibetan
@ -129,25 +135,25 @@ class TPair {
    boolean isPostSuffix() {
        return (null != l
                && ((null == r || "".equals(r))
-                    || "-".equals(r)
-                    || "A".equals(r)) // FIXME: though warn about GAMASA vs. GAMS
-                && ACIPRules.isACIPPostsuffix(l));
+                    || traits.disambiguator().equals(r)
+                    || traits.aVowel().equals(r)) // FIXME: though warn about GAMASA vs. GAMS
+                && traits.isPostsuffix(l));
    }

    /** Returns true if and only if this pair could be a Tibetan
-     *  suffix. FIXME: ACIP specific, just like isPostSuffix() and isPrefix() */
+     *  suffix. */
    boolean isSuffix() {
        return (null != l
                && ((null == r || "".equals(r))
-                    || "-".equals(r)
-                    || "A".equals(r))
-                && ACIPRules.isACIPSuffix(l));
+                    || traits.disambiguator().equals(r)
+                    || traits.aVowel().equals(r))
+                && traits.isSuffix(l));
    }

    /** Returns true if and only if this pair is merely a
     *  disambiguator. */
    boolean isDisambiguator() {
-        return ("-".equals(r) && getLeft() == null);
+        return (traits.disambiguator().equals(r) && getLeft() == null);
    }

    /** Yep, this works for TPairs. */
@ -160,16 +166,16 @@ class TPair {
        return false;
    }

-    /** Returns a TPair that is like this pair except that it has
-     *  a "+" on the right if this pair is empty on the right and is
-     *  empty on the right if this pair has a disambiguator (i.e., a
-     *  '-') on the right.  May return itself (but never mutates this
+    /** Returns a TPair that is like this pair except that it has a
+     *  "+" on the right if this pair is empty on the right and is
+     *  empty on the right if this pair has a disambiguator on the
+     *  right.  May return itself (but never mutates this
     *  instance). */
    TPair insideStack() {
        if (null == getRight())
-            return new TPair(getLeft(), "+");
-        else if ("-".equals(getRight()))
-            return new TPair(getLeft(), null);
+            return new TPair(traits, getLeft(), "+");
+        else if (traits.disambiguator().equals(getRight()))
+            return new TPair(traits, getLeft(), null);
        else
            return this;
    }
@ -194,7 +200,7 @@ class TPair {
    String getWylie(boolean justLeft) {
        String leftWylie = null;
        if (getLeft() != null) {
-            leftWylie = ACIPRules.getWylieForACIPConsonant(getLeft());
+            leftWylie = traits.getEwtsForConsonant(getLeft());
            if (leftWylie == null) {
                if (isNumeric())
                    leftWylie = getLeft();
@ -208,7 +214,7 @@ class TPair {
        else if ("+".equals(getRight()))
            rightWylie = "+";
        else if (getRight() != null)
-            rightWylie = ACIPRules.getWylieForACIPVowel(getRight());
+            rightWylie = traits.getEwtsForWowel(getRight());
        if (null == rightWylie) rightWylie = "";
        return leftWylie + rightWylie;
    }
@ -227,18 +233,19 @@ class TPair {
    void getUnicode(StringBuffer consonantSB, StringBuffer vowelSB,
                    boolean subscribed) {
        if (null != getLeft()) {
-            String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
+            String x = traits.getUnicodeFor(getLeft(), subscribed);
            if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni");
            consonantSB.append(x);
        }
        if (null != getRight()
            && !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
-            String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
+            String x = traits.getUnicodeFor(getRight(), subscribed);
            if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
            vowelSB.append(x);
        }
    }

+    // TODO(DLC)[EWTS->Tibetan]
    /** Returns true if this pair is surely the last pair in an ACIP
     *  stack. Stacking continues through (* . ) and (* . +), but
     *  stops anywhere else. */
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -33,6 +33,9 @@ import java.util.ArrayList;
 *
 *  @author David Chandler */
 class TPairList {
+    /** the part that knows ACIP from EWTS */
+    private TTraits traits;
+
    /** FIXME: change me and see if performance improves. */
    private static final int INITIAL_SIZE = 1;

@ -41,17 +44,20 @@ class TPairList {

    /** Creates a new list containing just p. */
    public TPairList(TPair p) {
+        this.traits = p.getTraits();
        al = new ArrayList(1);
        add(p);
    }

    /** Creates an empty list. */
-    public TPairList() {
+    public TPairList(TTraits traits) {
+        this.traits = traits;
        al = new ArrayList(INITIAL_SIZE);
    }

    /** Creates an empty list with the capacity to hold N items. */
-    public TPairList(int N) {
+    public TPairList(TTraits traits, int N) {
+        this.traits = traits;
        al = new ArrayList(N);
    }

@ -181,7 +187,7 @@ class TPairList {
                return ErrorsAndWarnings.getMessage(125, shortMessages, translit);
            } else if ((null == p.getLeft() && !"-".equals(p.getRight()))
                       || (null != p.getLeft()
-                           && !ACIPRules.isConsonant(p.getLeft())
+                           && !traits.isConsonant(p.getLeft())
                           && !p.isNumeric())) {
                // FIXME: stop handling this outside of ErrorsAndWarnings:
                if (null == p.getLeft()) {
@ -406,12 +412,12 @@ class TPairList {
                // and only if b1 is one, etc.
                for (int counter = 0; counter < (1<<numBreaks); counter++) {
                    TStackList sl = new TStackList();
-                    TPairList currentStack = new TPairList();
+                    TPairList currentStack = new TPairList(traits);
                    for (int k = startLoc; k <= i; k++) {
                        if (!get(k).isDisambiguator()) {
                            if (get(k).isNumeric()
                                || (get(k).getLeft() != null
-                                    && ACIPRules.isConsonant(get(k).getLeft())))
+                                    && traits.isConsonant(get(k).getLeft())))
                                currentStack.add(get(k).insideStack());
                            else
                                return null; // sA, for example, is illegal.
@ -419,7 +425,7 @@ class TPairList {
                        if (k == i || get(k).endsACIPStack()) {
                            if (!currentStack.isEmpty())
                                sl.add(currentStack.asStack());
-                            currentStack = new TPairList();
+                            currentStack = new TPairList(traits);
                        } else {
                            if (numBreaks > 0) {
                                for (int j = 0; breakStart+j < 3; j++) {
@ -427,7 +433,7 @@ class TPairList {
                                        && 1 == ((counter >> j) & 1)) {
                                        if (!currentStack.isEmpty())
                                            sl.add(currentStack.asStack());
-                                        currentStack = new TPairList();
+                                        currentStack = new TPairList(traits);
                                        break; // shouldn't matter, but you never know
                                    }
                                }
@ -460,9 +466,9 @@ class TPairList {
        if (!isEmpty()) {
            TPair lastPair = get(size() - 1);
            if ("+".equals(lastPair.getRight()))
-                al.set(size() - 1, new TPair(lastPair.getLeft(), null));
+                al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
            else if ("-".equals(lastPair.getRight()))
-                al.set(size() - 1, new TPair(lastPair.getLeft(), null));
+                al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
        }
        return this;
    }
@ -506,10 +512,10 @@ class TPairList {
                add_U0F7F = true;
                StringBuffer rr = new StringBuffer(p.getRight());
                rr.deleteCharAt(where);
-                p = new TPair(p.getLeft(), rr.toString());
+                p = new TPair(traits, p.getLeft(), rr.toString());
            }
            boolean hasNonAVowel = (!"A".equals(p.getRight()) && null != p.getRight());
-            String thislWylie = ACIPRules.getWylieForACIPConsonant(p.getLeft());
+            String thislWylie = traits.getEwtsForConsonant(p.getLeft());
            if (thislWylie == null) {
                char ch;
                if (p.isNumeric()) {
@ -528,21 +534,21 @@ class TPairList {
            boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
            boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
            if (ddebug && !isTibetan && !isSanskrit && !isNumeric) {
-                System.out.println("OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
+                System.out.println("OTHER for " + lWylie + " with vowel " + traits.getEwtsForWowel(p.getRight()) + " and p.getRight()=" + p.getRight());
            }
            if (isTibetan && isSanskrit) {
                 // RVA, e.g.  It must be Tibetan because RWA is what
                 // you'd use for RA over fixed-form WA.
                isSanskrit = false;
            }
-            if (ddebug && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
-                System.out.println("vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
+            if (ddebug && hasNonAVowel && traits.getEwtsForWowel(p.getRight()) == null) {
+                System.out.println("vowel " + traits.getEwtsForWowel(p.getRight()) + " and p.getRight()=" + p.getRight());
            }
            TGCPair tp;
            indexList.add(new Integer(index));
            tp = new TGCPair(lWylie.toString(),
                             (hasNonAVowel
-                              ? ACIPRules.getWylieForACIPVowel(p.getRight())
+                              ? traits.getEwtsForWowel(p.getRight())
                              : ""),
                             (isNumeric
                              ? TGCPair.TYPE_OTHER
@ -697,9 +703,9 @@ class TPairList {
        if (lastPair.getRight() == null || lastPair.equals("-")) {
            duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
        } else {
-            ACIPRules.getDuffForACIPVowel(duffsAndErrors,
-                                          TibetanMachineWeb.getGlyph(hashKey),
-                                          lastPair.getRight());
+            traits.getDuffForWowel(duffsAndErrors,
+                                   TibetanMachineWeb.getGlyph(hashKey),
+                                   lastPair.getRight());
        }
        if (previousSize == duffsAndErrors.size())
            throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion.
--- a/source/org/thdl/tib/text/ttt/TPairListFactory.java
+++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java
@ -121,7 +121,7 @@ class TPairListFactory {

        // base case for our recursion:
        if ("".equals(acip))
-            return new TPairList();
+            return new TPairList(ttraits);

        StringBuffer acipBuf = new StringBuffer(acip);
        int howMuchBuf[] = new int[1];
@ -131,9 +131,9 @@ class TPairListFactory {
            && null != head.getLeft()
            && null != head.getRight()
            && weHaveSeenVowelAlready
-            && ACIPRules.isACIPSuffix(head.getLeft()) // DKY'O should be two horizontal units, not three. -- {D}{KY'O}, not {D}{KY}{'O}.
+            && ttraits.isSuffix(head.getLeft()) // DKY'O should be two horizontal units, not three. -- {D}{KY'O}, not {D}{KY}{'O}.
            && head.getRight().startsWith("'")) {
-            head = new TPair(head.getLeft(),
+            head = new TPair(ttraits, head.getLeft(),
                             // Without this disambiguator, we are
                             // less efficient (8 parses, not 4) and
                             // we can't handle PA'AM'ANG etc.
@ -177,11 +177,11 @@ class TPairListFactory {
    }

    // TODO(DLC)[EWTS->Tibetan]: doc
-    private static TPairList breakHelperEWTS(String ewts, TTraits ttraits /* TODO(DLC)[EWTS->Tibetan]: use */) {
+    private static TPairList breakHelperEWTS(String ewts, TTraits ttraits) {

        // base case for our recursion:
        if ("".equals(ewts))
-            return new TPairList();
+            return new TPairList(ttraits);

        StringBuffer ewtsBuf = new StringBuffer(ewts);
        int howMuchBuf[] = new int[1];
@ -238,11 +238,11 @@ class TPairListFactory {
        int i, xl = acip.length();
        if (0 == xl) {
            howMuch[0] = 0;
-            return new TPair(null, null);
+            return new TPair(ttraits, null, null);
        }
        if (acip.charAt(0) == ttraits.disambiguatorChar()) {
            howMuch[0] = 1;
-            return new TPair(null, ttraits.disambiguator());
+            return new TPair(ttraits, null, ttraits.disambiguator());
        }
        char ch = acip.charAt(0);

@ -250,7 +250,7 @@ class TPairListFactory {
        // like seeing 1-2-3-4.
        if (ch >= '0' && ch <= '9') {
            howMuch[0] = 1; // not 2...
-            return new TPair(acip.substring(0, 1), (xl == 1) ? null : ttraits.disambiguator());
+            return new TPair(ttraits, acip.substring(0, 1), (xl == 1) ? null : ttraits.disambiguator());
        }

        String l = null, r = null;
@ -264,11 +264,11 @@ class TPairListFactory {
        int ll = (null == l) ? 0 : l.length();
        if (null != l && xl > ll && acip.charAt(ll) == ttraits.disambiguatorChar()) {
            howMuch[0] = l.length() + 1;
-            return new TPair(l, ttraits.disambiguator());
+            return new TPair(ttraits, l, ttraits.disambiguator());
        }
        if (null != l && xl > ll && acip.charAt(ll) == '+') {
            howMuch[0] = l.length() + 1;
-            return new TPair(l, "+");
+            return new TPair(ttraits, l, "+");
        }
        for (i = Math.min(ttraits.maxWowelLength(), xl - ll); i >= 1; i--) {
            String t = null;
@ -289,7 +289,7 @@ class TPairListFactory {
            && acip.charAt(z) == '+') {
            acip.deleteCharAt(z-1);
            howMuch[0] = l.length() + 1;
-            return new TPair(l, "+");
+            return new TPair(ttraits, l, "+");
        }

        // Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:. /* TODO(DLC)[EWTS->Tibetan]: */
@ -305,14 +305,14 @@ class TPairListFactory {
        if (null == l && null == r) {
            howMuch[0] = 1; // not 2...
            // add a disambiguator to avoid exponential running time:
-            return new TPair(acip.substring(0, 1),
+            return new TPair(ttraits, acip.substring(0, 1),
                             (xl == 1) ? null : ttraits.disambiguator());
        }

        howMuch[0] = (((l == null) ? 0 : l.length())
                      + ((r == null) ? 0 : r.length())
                      + mod);
-        return new TPair(l, r);
+        return new TPair(ttraits, l, r);
    } // TODO(DLC)[EWTS->Tibetan]:
 }

--- a/source/org/thdl/tib/text/ttt/TTraits.java
+++ b/source/org/thdl/tib/text/ttt/TTraits.java
@ -18,12 +18,18 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

+import java.util.ArrayList;
+import org.thdl.tib.text.DuffCode;
+
 /** A TTraits object encapsulates all the things that make a
- *  particular Roman transliteration scheme unique.  If both EWTS and
- *  ACIP transliterations have a property in common, then it's likely
- *  encoded in a manner that's hard to modify.  But if they differ in
- *  some respect, then that difference should be encoded in a TTraits
- *  object.
+ *  particular Roman transliteration scheme unique.  For the most
+ *  part, this difference is expressed at the finest granularity
+ *  possible -- often single characters of Roman transliteration.
+ *
+ *  <p>If both EWTS and ACIP transliterations have a property in
+ *  common, then it's likely encoded in a manner that's hard to
+ *  modify.  But if they differ in some respect, then that difference
+ *  should be encoded in a TTraits object.
 *
 *  <p>It is very likely that classes that implement this interface
 *  will choose to use the design pattern 'singleton'. */
@ -62,9 +68,63 @@ interface TTraits {
    /** Returns true if and only if <em>s</em> is a stretch of
     *  transliteration corresponding to a Tibetan wowel (without any
     *  [achen or other] consonant) */
-    boolean isWowel(String s);
+    boolean isWowel(String s); // TODO(DLC)[EWTS->Tibetan]: what about "m:" as opposed to "m" or ":"

    /** Returns true if and only if the pair given has a simple error
     *  other than being a mere disambiguator. */
    boolean hasSimpleError(TPair p);
+
+    /** The implicit 'ahhh' vowel, the one you see when you write the
+        human-friendly transliteration for "\u0f40\u0f0b". */
+    String aVowel();
+
+    /** Returns true if s is a valid postsuffix.  s must not have a
+        wowel on it. */
+    boolean isPostsuffix(String s);
+
+    /** Returns true if and only if l is the representation of a
+        letter that can be a suffix.  Note that all postsuffixes are
+        also suffixes.  l should not have a wowel. */
+    boolean isSuffix(String l);
+
+    /** Returns true if and only if l is the representation of a
+        letter that can be a prefix.  l should not have a wowel. */
+    boolean isPrefix(String l);
+
+    /** Returns the EWTS transliteration corresponding to the
+     *  consonant l, which should not have a vowel.  Returns null if
+     *  there is no such EWTS.
+     *
+     *  <p>May return "W" instead of "w", "r" instead of "R", and "y"
+     *  instead of "Y" because we sometimes don't have enough context
+     *  to decide.
+     *
+     *  <p>The reasoning for "W" instead of "w" is that r-w and r+w
+     *  are both known hash keys (as {@link
+     *  org.thdl.tib.text#TibetanMachineWeb} would call them).  We
+     *  sort 'em out this way.  (They are the only things like this
+     *  according to bug report #800166.) */
+    String getEwtsForConsonant(String l);
+
+    /** Returns the EWTS corresponding to the given punctuation or
+     *  mark.  Returns null if there is no such EWTS. */
+    String getEwtsForOther(String l);
+
+    /** Returns the EWTS corresponding to the given "wowel".  Returns
+     *  null if there is no such EWTS. */
+    String getEwtsForWowel(String l);
+
+    /** If l is a consonant or vowel or punctuation mark, then this
+     *  returns the Unicode for it.  The Unicode for the subscribed
+     *  form of the glyph is returned if subscribed is true.  Returns
+     *  null if l is unknown. */
+    String getUnicodeFor(String l, boolean subscribed);
+
+    /** Returns a scanner that can break up a string of
+        transliteration. */
+    TTshegBarScanner scanner();
+
+    /** Gets the duffcodes for wowel, such that they look good with
+     *  the preceding glyph, and appends them to duff. */
+    void getDuffForWowel(ArrayList duff, DuffCode preceding, String wowel);
 }
--- a/source/org/thdl/tib/text/ttt/TTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/TTshegBarScanner.java
@ -18,7 +18,11 @@ Contributor(s): ______________________________________.

 package org.thdl.tib.text.ttt;

-import java.io.*;
+import java.io.IOException;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.InputStream;
+import java.io.BufferedReader;
 import java.util.ArrayList;
 import java.util.Stack;

@ -40,7 +44,7 @@ public abstract class TTshegBarScanner {
     *  If errors is non-null, error messages will be appended to it.
     *  Returns a list of TStrings that is the scan.  Warning and
     *  error messages in the result will be long and self-contained
-     *  unless shortMessagse is true.
+     *  unless shortMessages is true.
     *
     *  <p>This is not so efficient; copies the whole file into memory
     *  first.