A preliminary TMW->ACIP converter is here. There are known bugs, mostly with rare punctuation.

2003-09-02 06:39:33 +00:00 · 2003-09-02 06:39:33 +00:00 · 316f59107b
commit 316f59107b
parent cc9ab06864
9 changed files with 278 additions and 88 deletions
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@ -417,6 +417,8 @@ class ConvertDialog extends JDialog
        } else { // conversion {to Wylie or TM} mode
            if (TMW_TO_WYLIE == ct) {
                newFileNamePrefix = suggested_WYLIE_prefix;
+            } else if (TMW_TO_ACIP == ct) {
+                newFileNamePrefix = suggested_ACIP_prefix;
            } else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
                newFileNamePrefix = suggested_TO_UNI_prefix;
            } else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {
--- a/source/org/thdl/tib/input/ConverterGUI.java
+++ b/source/org/thdl/tib/input/ConverterGUI.java
@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
                                          "Attention required",
                                          JOptionPane.ERROR_MESSAGE);
            return false;
+        } else if (49 == returnCode) {
+            JOptionPane.showMessageDialog(cd,
+                                          "Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n    Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.",
+                                          "Attention required",
+                                          JOptionPane.ERROR_MESSAGE);
+            return false;
        } else if (43 == returnCode) {
            JOptionPane.showMessageDialog(cd,
                                          "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@ -31,6 +31,7 @@ interface FontConverterConstants
    final String TM_TO_TMW = "TM to TMW";
    final String TMW_TO_UNI = "TMW to Unicode";
    final String TMW_TO_WYLIE = "TMW to Wylie";
+    final String TMW_TO_ACIP = "TMW to ACIP";
    final String TMW_TO_TM = "TMW to TM";
    final String FIND_SOME_NON_TMW = "Find some non-TMW";
    final String FIND_SOME_NON_TM = "Find some non-TM";
@ -43,6 +44,7 @@ interface FontConverterConstants
        TM_TO_TMW,
        TMW_TO_UNI,
        TMW_TO_WYLIE,
+        TMW_TO_ACIP,
        TMW_TO_TM,
        FIND_SOME_NON_TMW,
        FIND_SOME_NON_TM,
@ -51,6 +53,7 @@ interface FontConverterConstants
    };

    final String suggested_WYLIE_prefix = "THDL_Wylie_";
+    final String suggested_ACIP_prefix = "ACIP_";
    final String suggested_TO_TMW_prefix = "TMW_";
    final String suggested_TO_UNI_prefix = "Uni_";
    final String suggested_TO_TM_prefix = "TM_";
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants {
            boolean convertACIPToTMWMode = false;
            boolean convertToTMWMode = false;
            boolean convertToWylieMode = false;
+            boolean convertToACIPMode = false;
            boolean findSomeNonTMWMode = false;
            boolean findAllNonTMWMode = false;
            boolean findSomeNonTMMode = false;
@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants {
                             = args[0].equals("--to-unicode"))
                         || (convertToWylieMode
                             = args[0].equals("--to-wylie"))
+                         || (convertToACIPMode
+                             = args[0].equals("--to-acip"))
                         || (findSomeNonTMWMode
                             = args[0].equals("--find-some-non-tmw"))
                         || (findSomeNonTMMode
@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants {
                ))) {
                out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
                out.println("                  | --to-tibetan-machine | --to-tibetan-machine-web");
-                out.println("                  | --to-unicode | --to-wylie] RTF_file");
+                out.println("                  | --to-unicode | --to-wylie | --to-acip] RTF_file");
                out.println(" | TibetanConverter --acip-to-unicode TXT_file");
                out.println(" | TibetanConverter [--version | -v | --help | -h]");
                out.println("");
@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants {
                out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
                out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
                out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
+                out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
                out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
                out.println(" --find-all-non-tmw to locate all characters in the input document that are");
                out.println("   not in Tibetan Machine Web fonts, exit zero if and only if none found");
@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants {
            } else { // conversion {to Wylie or TM} mode
                if (convertToWylieMode) {
                    conversionTag = TMW_TO_WYLIE;
+                } else if (convertToACIPMode) {
+                    conversionTag = TMW_TO_ACIP;
                } else if (convertToUnicodeMode) {
                    conversionTag = TMW_TO_UNI;
                } else if (convertToTMWMode) {
@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants {
                ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
                                 + ((TMW_TO_UNI == ct) ? 1 : 0)
                                 + ((TM_TO_TMW == ct) ? 1 : 0)
+                                 + ((TMW_TO_ACIP == ct) ? 1 : 0)
                                 + ((TMW_TO_WYLIE == ct) ? 1 : 0)
                                 == 1);
                long numAttemptedReplacements[] = new long[] { 0 };
@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants {
                                      numAttemptedReplacements)) {
                        exitCode = 44;
                    }
+                } else if (TMW_TO_ACIP == ct) {
+                    // Convert to ACIP:
+                    if (!tdoc.toACIP(0,
+                                     tdoc.getLength(),
+                                     numAttemptedReplacements)) {
+                        exitCode = 49;
+                    }
                } else if (TMW_TO_UNI == ct) {
                    StringBuffer errors = new StringBuffer();
                    // Convert to Unicode:
--- a/source/org/thdl/tib/scanner/Manipulate.java
+++ b/source/org/thdl/tib/scanner/Manipulate.java
@ -44,8 +44,13 @@ public class Manipulate
 	    return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
 	}
 	
+    /** Returns null on error. */
 	public static String wylieToAcip(String palabra)
 	{
+		// DLC FIXME: for unknown things, return null.
+		if (palabra.equals("@#")) return "*";
+		if (palabra.startsWith("@") || palabra.startsWith("#"))
+			return null; // we can't convert this in isolation!  We need context.
 		char []caract;
 		int i, j, len;
 		String nuevaPalabra;
@ -83,6 +88,12 @@ public class Manipulate
 		nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
 		nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
 		nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
+		nuevaPalabra = replace(nuevaPalabra, "a", "'A");
+		nuevaPalabra = replace(nuevaPalabra, "i", "'I");
+		nuevaPalabra = replace(nuevaPalabra, "u", "'U");
+		nuevaPalabra = replace(nuevaPalabra, "-I", "i");
+		nuevaPalabra = replace(nuevaPalabra, "/", ",");
+		nuevaPalabra = replace(nuevaPalabra, "_", "    ");
 		nuevaPalabra = fixWazur(nuevaPalabra);
 		return nuevaPalabra;
 	}
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@ -86,6 +86,30 @@ public class TGCPair {
            b.append(vowelWylie);
        return b.toString();
    }
+    public String getACIP() {
+        // DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
+        StringBuffer b = new StringBuffer();
+        if (consonantWylie != null) {
+            String consonantACIP // DLC FIXME can KAsh occur?
+                = org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
+            if (null == consonantACIP) throw new Error("how?");
+            // System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
+            // we may have {P-Y}, but the user wants to see {PY}.
+            for (int i = 0; i < consonantACIP.length(); i++) {
+                char ch = consonantACIP.charAt(i);
+                if ('-' != ch)
+                    b.append(ch);
+            }
+        }
+        if (vowelWylie != null) {
+            String vowelACIP // DLC FIXME look for exceptions
+                = org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
+            // System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
+            if (null == vowelACIP) throw new Error("how?");
+            b.append(vowelACIP);
+        }
+        return b.toString();
+    }
    public int classification;
    /** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
     *  consonantWylie and vowel vowelWylie.  Use
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants {
    private static final boolean makeIllegalTibetanGoEndToEnd = true;


-    /** Returns "a", unless wylie is already "a". */
-    private static String aVowelToUseAfter(String wylie) {
+    /** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
+        is already "a". */
+    private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
        if (wylie.equals(ACHEN))
            return "";
        else
-            return WYLIE_aVOWEL;
+            return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
    }

-    private static String unambiguousPostAVowelWylie(String wylie1,
-                                                     String wylie2) {
+    private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
+                                                        String wylie1,
+                                                        String wylie2,
+                                                        String acip1,
+                                                        String acip2) {
        String disambiguator = "";
        // type "lard" vs. "lar.d", and you'll see the need for this
        // disambiguation of suffix and postsuffix.  sa doesn't take
        // any head letters, so only da needs to be considered.
        if (TibetanMachineWeb.isWylieTop(wylie1)
            && wylie2.equals(/* FIXME: hard-coded */ "d"))
-            disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING;
-        return wylie1 + disambiguator + wylie2;
+            disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-";
+        if (EWTSNotACIP)
+            return wylie1 + disambiguator + wylie2;
+        else
+            return acip1 + disambiguator + acip2;
    }

 /**
-* Gets the Extended Wylie for a sequence of glyphs.
+* Gets the Extended Wylie for the given sequence of glyphs if
+* EWTSNotACIP is true, or the ACIP otherwise.
+* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
+* you want ACIP
 * @param dcs an array of glyphs
-* @param noSuchWylie an array which will not be touched if this is
-* successful; however, if there is no THDL Extended Wylie
-* corresponding to these glyphs, then noSuchWylie[0] will be set to
-* true
-* @return the Extended Wylie corresponding to these glyphs, or null */
-    public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) {
+* @param noSuch an array which will not be touched if this is
+* successful; however, if there is no THDL Extended Wylie/ACIP
+* corresponding to these glyphs, then noSuch[0] will be set to true
+* @return the Extended Wylie/ACIP corresponding to these glyphs, or
+* null */
+    public static String getTranslit(boolean EWTSNotACIP,
+                                     DuffCode[] dcs,
+                                     boolean noSuch[]) {
        StringBuffer warnings = (debug ? new StringBuffer() : null);
-        String ans = getWylieImplementation(dcs, noSuchWylie, warnings);
+        String ans
+            = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
        if (debug && warnings.length() > 0)
            System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
        return ans;
@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants {
        return candidateType;
    }

-    /** Appends to wylieBuffer the wylie for the glyph list glyphList
-        (which should be an ArrayList for speed).  This will be very
-        user-friendly for "legal tsheg bars" and will be valid, but
-        possibly ugly (interspersed with disambiguators or extra
-        vowels, etc.) Wylie for other things, such as Sanskrit
-        transliteration.  Updates warnings and noSuchWylie like the
-        caller does.
+    /** Appends to translitBuffer the EWTS/ACIP for the glyph list
+        glyphList (which should be an ArrayList for speed).  This will
+        be very user-friendly for "legal tsheg bars" and will be
+        valid, but possibly ugly (interspersed with disambiguators or
+        extra vowels, etc.) Wylie/ACIP for other things, such as
+        Sanskrit transliteration.  Updates warnings and noSuch like
+        the caller does.

        <p>What constitutes a legal, non-punctuation, non-whitespace
        tsheg bar?  The following are the only such:</p>
@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {
        
        <p>When there are three unadorned consonant stacks in a
           tyllable, a hard-coded list of valid Tibetan tsheg bars is
-           relied upon to determine if the 'a' vowel comes after the
-           first or the second consonant.</p> */
-    private static void getTshegBarWylie(java.util.List glyphList,
-                                         boolean noSuchWylie[],
-                                         StringBuffer warnings,
-                                         StringBuffer wylieBuffer) {
+           relied upon to determine if the 'a'/'A' vowel comes after
+           the first or the second consonant.</p> */
+    private static void getTshegBarTranslit(boolean EWTSNotACIP,
+                                            java.util.List glyphList,
+                                            boolean noSuch[],
+                                            StringBuffer warnings,
+                                            StringBuffer translitBuffer) {
        TGCList gcs
-            = breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
+            = breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
        String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
        int sz = gcs.size();
        if (candidateType == "invalid"
            || candidateType == "single-sanskrit-gc") {
            // Forget beauty and succintness -- just be sure to
-            // generate Wylie that can be converted unambiguously into
-            // Tibetan.  Use a disambiguator or vowel after each
-            // grapheme cluster.
+            // generate transliteration that can be converted
+            // unambiguously into Tibetan.  Use a disambiguator or
+            // vowel after each grapheme cluster.
            //
            // If we truly didn't care about beauty, we'd just lump
            // SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into
@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants {
                TGCPair tp = (TGCPair)gcs.get(i);
                int cls = tp.classification;
                String wylie = tp.getWylie();
-                wylieBuffer.append(wylie);
+                String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
+                translitBuffer.append(translit);
                if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
                    || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
-                    wylieBuffer.append(aVowelToUseAfter(wylie));
+                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
                } else {
                    if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
                        && TGCPair.SANSKRIT_WITH_VOWEL != cls)
-                        wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY);
+                        translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
                }
            }
        } else {
-            // Generate perfect, beautiful, Wylie, using the minimum
-            // number of vowels and disambiguators.
+            // Generate perfect, beautiful transliteration, using the
+            // minimum number of vowels and disambiguators.

            int leftover = sz + 1;

@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants {
                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
                String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
+                String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
+                String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
+                String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
                if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
                    || (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
                    || (wylie1.equals("b") && wylie2.equals("d"))
                    || (wylie1.equals("m") && wylie2.equals("d"))
                    || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
                    if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
-                        wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+                        if (EWTSNotACIP)
+                            translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+                        else
+                            translitBuffer.append(acip1 + '-' + acip2);
                    else
-                        wylieBuffer.append(wylie1 + wylie2);
+                        if (EWTSNotACIP)
+                            translitBuffer.append(wylie1 + wylie2);
+                        else
+                            translitBuffer.append(acip1 + acip2);

-                    wylieBuffer.append(aVowelToUseAfter(wylie2)
-                                       + wylie3);
+                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
+                                          + (EWTSNotACIP ? wylie3 : acip3));
                } else {
-                    wylieBuffer.append(wylie1
-                                       + aVowelToUseAfter(wylie1)
-                                       + unambiguousPostAVowelWylie(wylie2,
-                                                                    wylie3));
+                    if (EWTSNotACIP)
+                        translitBuffer.append(wylie1
+                                              + aVowelToUseAfter(EWTSNotACIP, wylie1)
+                                              + unambiguousPostAVowelTranslit(EWTSNotACIP,
+                                                                              wylie2,
+                                                                              wylie3,
+                                                                              acip2,
+                                                                              acip3));
+                    else
+                        translitBuffer.append(acip1
+                                              + aVowelToUseAfter(EWTSNotACIP, wylie1)
+                                              + unambiguousPostAVowelTranslit(EWTSNotACIP,
+                                                                              wylie2,
+                                                                              wylie3,
+                                                                              acip2,
+                                                                              acip3));
                }
            } else if ("root" == candidateType
                       || "prefix/root-root/suffix" == candidateType
@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants {
                       || "root-suffix-postsuffix" == candidateType
                       || "root-suffix" == candidateType) {
                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+                String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
                leftover = 1;
-                wylieBuffer.append(wylie1);
+                translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
                if (((TGCPair)gcs.get(0)).classification
                    != TGCPair.CONSONANTAL_WITH_VOWEL) {
                    ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
                                     == ((TGCPair)gcs.get(0)).classification);
-                    wylieBuffer.append(aVowelToUseAfter(wylie1));
+                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
                    if (debug) System.out.println("DEBUG: appending vowel");
                } else {
                    if (debug) System.out.println("DEBUG: already has vowel 2");
@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants {
                    leftover = 3;
                    String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
-                    wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
-                                                                  wylie3));
+                    String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
+                    String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
+                    translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
+                                                                        wylie2,
+                                                                        wylie3,
+                                                                        acip2,
+                                                                        acip3));
                }
            } else if ("prefix-root-suffix" == candidateType
                       || "prefix-root" == candidateType
                       || "prefix-root-suffix-postsuffix" == candidateType) {
                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+                String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
+                String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
                leftover = 2;
                if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
-                    wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+                    if (EWTSNotACIP)
+                        translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+                    else
+                        translitBuffer.append(acip1 + '-' + acip2);
                else
-                    wylieBuffer.append(wylie1 + wylie2);
+                    if (EWTSNotACIP)
+                        translitBuffer.append(wylie1 + wylie2);
+                    else
+                        translitBuffer.append(acip1 + acip2);

                if (((TGCPair)gcs.get(1)).classification
                    != TGCPair.CONSONANTAL_WITH_VOWEL) {
                    ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
                                     == ((TGCPair)gcs.get(1)).classification);
                    if (debug) System.out.println("DEBUG: appending vowel");
-                    wylieBuffer.append(aVowelToUseAfter(wylie2));
+                    translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
                } else {
                    if (debug) System.out.println("DEBUG: already has vowel 1");
                }
@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants {
                    leftover = 4;
                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
                    String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
-                    wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
-                                                                  wylie4));
+                    String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
+                    String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP();
+                    translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
+                                                                        wylie3,
+                                                                        wylie4,
+                                                                        acip3,
+                                                                        acip4));
                }
            } else if ("number" == candidateType) {
                leftover = 0;
@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants {
                throw new Error("missed a case down here");
            }

-            // append the wylie left over:
+            // append the wylie/ACIP left over:
            for (int i = leftover; i < sz; i++) {
                TGCPair tp = (TGCPair)gcs.get(i);
-                String wylie = tp.getWylie();
-                wylieBuffer.append(wylie);
+                translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
            }
        }
    }

 /**
-* Gets the Extended Wylie for a sequence of glyphs.  This works as
-* follows:
+* Gets the Extended Wylie/ACIP for a sequence of glyphs.  This works
+* as follows:
 *
 * <p>We run along until we hit whitespace or punctuation.  We take
 * everything before that and we see if it's a legal Tibetan tsheg bar,
@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants {
 * vowel in the correct place.  If not, then we throw a disambiguating
 * key or a vowel after each stack.
 *
+* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
+* you want ACIP
 * @param dcs an array of glyphs
-* @param noSuchWylie an array which will not be touched if this is
-* successful; however, if there is no THDL Extended Wylie
-* corresponding to these glyphs, then noSuchWylie[0] will be set to
-* true
+* @param noSuch an array which will not be touched if this is
+* successful; however, if there is no THDL Extended Wylie/ACIP
+* corresponding to these glyphs, then noSuch[0] will be set to true
 * @param warnings either null or a buffer to which will be appended
 * warnings about illegal tsheg bars
-* @return the Extended Wylie corresponding to these glyphs, or null */
-    public static String getWylieImplementation(DuffCode[] dcs,
-                                                boolean noSuchWylie[],
-                                                StringBuffer warnings) {
+* @return the Extended Wylie/ACIP corresponding to these glyphs, or
+* null */
+    private static String getTranslitImplementation(boolean EWTSNotACIP,
+                                                    DuffCode[] dcs,
+                                                    boolean noSuch[],
+                                                    StringBuffer warnings) {
        if (dcs.length == 0)
            return null;

        ArrayList glyphList = new ArrayList();
-        StringBuffer wylieBuffer = new StringBuffer();
+        StringBuffer translitBuffer = new StringBuffer();

        for (int i=0; i<dcs.length; i++) {
            char ch = dcs[i].getCharacter();
@ -1417,41 +1474,43 @@ public class TibTextUtils implements THDLWylieConstants {

            if (k < 32) {
                if (!glyphList.isEmpty()) {
-                    getTshegBarWylie(glyphList, noSuchWylie,
-                                     warnings, wylieBuffer);
+                    getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
+                                        warnings, translitBuffer);
                    glyphList.clear();
                    if (null != warnings)
                        warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
                }

-                wylieBuffer.append(ch);
+                translitBuffer.append(ch);
            } else {
-                String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuchWylie);
+                String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
+                String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch);
                if (TibetanMachineWeb.isWyliePunc(wylie)
                    && !TibetanMachineWeb.isWylieAdornment(wylie)) {
                    if (!glyphList.isEmpty()) {
-                        getTshegBarWylie(glyphList, noSuchWylie,
-                                         warnings, wylieBuffer);
+                        getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
+                                            warnings, translitBuffer);
                        glyphList.clear();
                    }
-                    wylieBuffer.append(wylie); //append the punctuation
+                    translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
                } else {
                    glyphList.add(dcs[i]);
                }
            }
        }

-        // replace remaining TMW with Wylie
+        // replace remaining TMW with transliteration

        if (!glyphList.isEmpty()) {
-            getTshegBarWylie(glyphList, noSuchWylie, warnings, wylieBuffer);
+            getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
+                                warnings, translitBuffer);
            // glyphList.clear() if we weren't about to exit...
            if (null != warnings)
                warnings.append("The stretch of Tibetan ended without final punctuation.");
        }

-        if (wylieBuffer.length() > 0)
-            return wylieBuffer.toString();
+        if (translitBuffer.length() > 0)
+            return translitBuffer.toString();
        else
            return null;
    }
--- a/source/org/thdl/tib/text/TibetanDocument.java
+++ b/source/org/thdl/tib/text/TibetanDocument.java
@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument {
 		return getWylie(0, getLength(), noSuchWylie);
 	}

+/**
+* Converts the entire document into ACIP.  If the document consists of
+* both Tibetan and non-Tibetan fonts, however, the conversion stops at
+* the first non-Tibetan font.
+* @param noSuchACIP an array which will not be touched if this is
+* successful; however, if there is no ACIP corresponding to one of
+* these glyphs, then noSuchACIP[0] will be set to true
+* @return the string of ACIP corresponding to this document */
+    public String getACIP(boolean noSuchACIP[]) {
+        return getACIP(0, getLength(), noSuchACIP);
+    }
+
 /**
 * Converts a portion of the document into Extended Wylie.
 * If the document consists of both Tibetan and
@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument {
 * corresponding to one of these glyphs, then noSuchWylie[0] will be
 * set to true
 * @return the string of Wylie corresponding to this document */
-	public String getWylie(int begin, int end, boolean noSuchWylie[]) {
+    public String getWylie(int begin, int end, boolean noSuchWylie[]) {
+        return getTranslit(true, begin, end, noSuchWylie);
+    }
+
+/**
+* Converts a portion of the document into ACIP.  If the document
+* consists of both Tibetan and non-Tibetan fonts, however, the
+* conversion stops at the first non-Tibetan font.
+* @param begin the beginning of the region to convert
+* @param end the end of the region to convert
+* @param noSuchWylie an array which will not be touched if this is
+* successful; however, if there is no ACIP corresponding to one of
+* these glyphs, then noSuchACIP[0] will be set to true
+* @return the string of ACIP corresponding to this document */
+    public String getACIP(int begin, int end, boolean noSuchACIP[]) {
+        return getTranslit(true, begin, end, noSuchACIP);
+    }
+
+	private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
 		AttributeSet attr;
 		String fontName;
 		int fontNum;
@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument {

 		java.util.List dcs = new ArrayList();
 		int i = begin;
-		StringBuffer wylieBuffer = new StringBuffer();
+		StringBuffer translitBuffer = new StringBuffer();

 		try {
 			while (i < end) {
@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument {
 					if (dcs.size() > 0) {
 						DuffCode[] dc_array = new DuffCode[0];
 						dc_array = (DuffCode[])dcs.toArray(dc_array);
-						wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+						translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
 						dcs.clear();
 					}
-					wylieBuffer.append(ch);
+					translitBuffer.append(ch);
 				}

 				//current character isn't TMW
@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument {
 					if (dcs.size() > 0) {
 						DuffCode[] dc_array = new DuffCode[0];
 						dc_array = (DuffCode[])dcs.toArray(dc_array);
-						wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+						translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
 						dcs.clear();
 					}
 				}
@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument {
 			if (dcs.size() > 0) {
 				DuffCode[] dc_array = new DuffCode[0];
 				dc_array = (DuffCode[])dcs.toArray(dc_array);
-				wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+				translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
 			}
-			return wylieBuffer.toString();
+			return translitBuffer.toString();
 		}
 		catch (BadLocationException ble) {
 			ble.printStackTrace();
@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument {
 * DuffCode..." text into the document */
    public boolean toWylie(int start, int end,
                           long numAttemptedReplacements[]) {
+        return toTranslit(true, start, end, numAttemptedReplacements);
+    }
+
+    // DLC DOC just like {@link #toWylie(int,int,long[])}
+    public boolean toACIP(int start, int end,
+                          long numAttemptedReplacements[]) {
+        return toTranslit(false, start, end, numAttemptedReplacements);
+    }
+
+    private boolean toTranslit(boolean EWTSNotACIP, int start, int end,
+                               long numAttemptedReplacements[]) {
        if (start >= end)
            return true;

@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument {
                        remove(start, i-start);
                        ThdlDebug.verify(getRomanAttributeSet() != null);
                        insertString(start,
-                                     TibTextUtils.getWylie(dc_array, noSuchWylie),
+                                     TibTextUtils.getTranslit(EWTSNotACIP,
+                                                              dc_array,
+                                                              noSuchWylie),
                                     getRomanAttributeSet());
                        dcs.clear();
                    }
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) {
 	return sb.toString();
 }

+    // DLC DOC
+private static String acipForGlyph(String hashKey) {
+    String ACIP // DLC FIXME: test this.
+        = org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
+    return ACIP;
+}
+
 /** Error that appears in a document when some TMW cannot be
 *  transcribed in THDL Extended Wylie.  This error message is
 *  documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
        + " to THDL Extended Wylie.  Please see the documentation for the TMW font and transcribe this yourself.]]>>";
 }

+/** Error that appears in a document when some TMW cannot be
+ *  transcribed in ACIP.  This error message is
+ *  documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
+ *  them both when you change this. */
+private static String getTMWToACIPErrorString(DuffCode dc) {
+    return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
+        + dc.toString(true)
+        + " to ACIP.  Please see the documentation for the TMW font and transcribe this yourself.]]>>";
+}
+
 /**
 * Gets the Extended Wylie value for this glyph.
 * @param font the font of the TibetanMachineWeb
@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
 	return wylieForGlyph(hashKey);
 }

+// DLC DOC
+public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
+    String hashKey = getHashKeyForGlyph(dc);
+    String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
+    if (hashKey == null || ans == null) {
+        noSuchACIP[0] = true;
+        return getTMWToACIPErrorString(dc);
+    }
+    return ans;
+}
+
    /** This addresses bug 624133, "Input freezes after impossible
     *  character".  Returns true iff s is a proper prefix of some
     *  legal input for this keyboard.  In the extended Wylie