Preliminary ACIP->TMW support is in place. {DU} gives you something

less beautiful than what Jskad would give, so more work is needed.
2003-08-31 16:06:35 +00:00 · 2003-08-31 16:06:35 +00:00 · 045c4069c9
commit 045c4069c9
parent 1f4d53be2e
12 changed files with 355 additions and 64 deletions
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@ -69,7 +69,8 @@ class ConvertDialog extends JDialog
                ConvertDialog.this.theRealActionPerformed(e);
            }};
    private void updateWarningLevels() {
-        if (choices.getSelectedItem() == ACIP_TO_UNI)
+        if (choices.getSelectedItem() == ACIP_TO_UNI
+            || choices.getSelectedItem() == ACIP_TO_TMW)
            this.warningLevels.enable();
        else
            this.warningLevels.disable();
@ -418,7 +419,7 @@ class ConvertDialog extends JDialog
                newFileNamePrefix = suggested_WYLIE_prefix;
            } else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
                newFileNamePrefix = suggested_TO_UNI_prefix;
-            } else if (TM_TO_TMW == ct) {
+            } else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {
                newFileNamePrefix = suggested_TO_TMW_prefix;
            } else {
                ThdlDebug.verify(TMW_TO_TM == ct);
--- a/source/org/thdl/tib/input/DuffPane.java
+++ b/source/org/thdl/tib/input/DuffPane.java
@ -615,10 +615,8 @@ public class DuffPane extends TibetanPane implements FocusListener {
 */
 	public void setRomanAttributeSet(String font, int size) {
        if (getTibDoc() != null) {
-            SimpleAttributeSet ras = new SimpleAttributeSet();
-            StyleConstants.setFontFamily(ras, romanFontFamily = font);
-            StyleConstants.setFontSize(ras, romanFontSize = size);
-            getTibDoc().setRomanAttributeSet(ras);
+            getTibDoc().setRomanAttributeSet(romanFontFamily = font,
+                                             romanFontSize = size);
        }
 	}

--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@ -27,6 +27,7 @@ import java.awt.*;
 interface FontConverterConstants
 {
    final String ACIP_TO_UNI = "ACIP to Unicode";
+    final String ACIP_TO_TMW = "ACIP to TMW";
    final String TM_TO_TMW = "TM to TMW";
    final String TMW_TO_UNI = "TMW to Unicode";
    final String TMW_TO_WYLIE = "TMW to Wylie";
@ -36,7 +37,9 @@ interface FontConverterConstants
    final String FIND_ALL_NON_TMW = "Find all non-TMW";
    final String FIND_ALL_NON_TM = "Find all non-TM";

-    final String[] CHOICES = new String[]{
+    final String[] CHOICES = new String[] {
+        ACIP_TO_UNI,
+        ACIP_TO_TMW,
        TM_TO_TMW,
        TMW_TO_UNI,
        TMW_TO_WYLIE,
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -71,6 +71,7 @@ public class TibetanConverter implements FontConverterConstants {
            boolean convertToUnicodeMode = false;
            boolean convertToTMMode = false;
            boolean convertACIPToUniMode = false;
+            boolean convertACIPToTMWMode = false;
            boolean convertToTMWMode = false;
            boolean convertToWylieMode = false;
            boolean findSomeNonTMWMode = false;
@ -91,6 +92,8 @@ public class TibetanConverter implements FontConverterConstants {
                             = args[0].equals("--to-tibetan-machine-web"))
                         || (convertACIPToUniMode
                             = args[0].equals("--acip-to-unicode"))
+                         || (convertACIPToTMWMode
+                             = args[0].equals("--acip-to-tmw"))
                         || (convertToUnicodeMode
                             = args[0].equals("--to-unicode"))
                         || (convertToWylieMode
@ -180,6 +183,8 @@ public class TibetanConverter implements FontConverterConstants {
                    conversionTag = TM_TO_TMW;
                } else if (convertACIPToUniMode) {
                    conversionTag = ACIP_TO_UNI;
+                } else if (convertACIPToTMWMode) {
+                    conversionTag = ACIP_TO_TMW;
                } else {
                    ThdlDebug.verify(convertToTMMode);
                    conversionTag = TMW_TO_TM;
@ -205,7 +210,7 @@ public class TibetanConverter implements FontConverterConstants {
        honored. */
    static int reallyConvert(InputStream in, PrintStream out, String ct,
                             String warningLevel) {
-        if (ACIP_TO_UNI == ct) {
+        if (ACIP_TO_UNI == ct || ACIP_TO_TMW == ct) {
            try {
                ArrayList al = ACIPTshegBarScanner.scanStream(in, null,
                                                              250 - 1 // DLC FIXME: make me configurable
@ -214,10 +219,17 @@ public class TibetanConverter implements FontConverterConstants {
                    return 47;
                StringBuffer warnings = new StringBuffer();
                boolean embeddedWarnings = (warningLevel != "None");
-                if (!ACIPConverter.convertToUnicode(al, out, null, warnings,
+                if (ACIP_TO_UNI == ct) {
+                    if (!ACIPConverter.convertToUnicode(al, out, null, warnings,
+                                                        embeddedWarnings,
+                                                        warningLevel))
+                        return 46;
+                } else {
+                    if (!ACIPConverter.convertToTMW(al, out, null, warnings,
                                                    embeddedWarnings,
                                                    warningLevel))
-                    return 46;
+                        return 46;
+                }
                if (embeddedWarnings && warnings.length() > 0)
                    return 45;
                else
--- a/source/org/thdl/tib/text/TibetanDocument.java
+++ b/source/org/thdl/tib/text/TibetanDocument.java
@ -141,6 +141,32 @@ public class TibetanDocument extends DefaultStyledDocument {
        appendDuff(tibetanFontSize, offset, s, attr);
    }

+/**
+* Inserts Latin text into the document. The font size is applied
+* automatically, according to the current Roman font size.
+* @param offset the position at which you want to insert text
+* @param s the string you want to insert
+* @see #setRomanAttributeSet(AttributeSet)
+*/
+	public void appendRoman(int offset, String s) throws BadLocationException {
+        ThdlDebug.verify(getRomanAttributeSet() != null);
+        insertString(offset, s, getRomanAttributeSet());
+    }
+
+/**
+* Inserts Latin text at the end of the document. The font size is
+* applied automatically, according to the current Roman font size.
+* @param s the string you want to insert
+* @see #setRomanAttributeSet(AttributeSet)
+*/
+	public void appendRoman(String s) {
+        try {
+            appendRoman(getLength(), s);
+        } catch (BadLocationException e) {
+            throw new Error("can't happen");
+        }
+    }
+
    private void appendDuff(int fontSize, int offset, String s, MutableAttributeSet attr) {
 		try {
 			StyleConstants.setFontSize(attr, fontSize);
@ -160,6 +186,19 @@ public class TibetanDocument extends DefaultStyledDocument {
        return insertDuff(tibetanFontSize, pos, glyphs, true);
 	}

+/**
+* Appends all DuffCodes in glyphs to the end of this document.
+*/
+	public void appendDuffCodes(DuffCode[] glyphs) {
+        // PERFORMANCE FIXME: this isn't so speedy, but it reuses
+        // existing code.
+        for (int i = 0; i < glyphs.length; i++) {
+            insertDuff(getLength(),
+                       new DuffData[] { new DuffData(new String(new char[] { glyphs[i].getCharacter() }),
+                                                     glyphs[i].getFontNum()) });
+        }
+	}
+

 	/** Replacing can be more efficient than inserting and then
        removing. This replaces the glyph at position pos with glyph,
@ -1039,6 +1078,15 @@ public class TibetanDocument extends DefaultStyledDocument {
        romanAttributeSet = ras;
    }

+    /** Sets the attribute set applied to Roman text in this
+        document. */
+    public void setRomanAttributeSet(String font, int size) {
+        SimpleAttributeSet ras = new SimpleAttributeSet();
+        StyleConstants.setFontFamily(ras, font);
+        StyleConstants.setFontSize(ras, size);
+        setRomanAttributeSet(ras);
+    }
+
 /**
 * Converts the specified portion of this document to THDL Extended
 * Wylie.
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -29,9 +29,9 @@ $~38,5~~9,41~~~~~~~0F06
 #~200,1~~9,39~~~~~~~0F05
 // Yig.mgo.tsheg.shad:
 %~39,5~~9,42~~~~~~~0F07
-// dbu.khang.g-yon:
+// dbu.khang.g-yon: (If this changes, edit ACIPConverter)
 (~208,1~~9,93~~~~~~~0F3C
-// dbu.khang.g-yas:
+// dbu.khang.g-yas: (If this changes, edit ACIPConverter)
 )~209,1~~9,94~~~~~~~0F3D
 H~239,1~~8,92~~~~~~~0F7F

--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -24,10 +24,14 @@ import java.util.Stack;

 import org.thdl.util.ThdlDebug;
 import org.thdl.util.ThdlOptions;
+import org.thdl.tib.text.TibetanDocument;
+import org.thdl.tib.text.TibetanMachineWeb;
+import org.thdl.tib.text.DuffCode;

 /**
-* This class is able to convert an ACIP file into Tibetan Machine Web.
-* From there, TMW->Unicode takes you to Unicode.
+* This class is able to convert an ACIP file into Tibetan Machine Web
+* and an ACIP file into TMW.  ACIP->Unicode should yield the same
+* results as ACIP->TMW followed by TMW->Unicode (FIXME: test it!)
 * @author David Chandler
 */
 public class ACIPConverter {
@ -86,38 +90,70 @@ public class ACIPConverter {
            warnings = new StringBuffer();
            putWarningsInOutput = true;
        }
-        convertToUnicode(al, System.out, errors, warnings,
-                         putWarningsInOutput, warningLevel);
+        convertToTMW(al, System.out, errors, warnings,
+                     putWarningsInOutput, warningLevel);
+        int retCode = 0;
        if (errors.length() > 0) {
            System.err.println("Errors converting ACIP input file: ");
            System.err.println(errors);
            System.err.println("The output contains these errors.");
            System.err.println("Exiting; please fix input file and try again.");
-            System.exit(2);
+            retCode = 2;
        }
        if (null != warnings && warnings.length() > 0) {
            System.err.println("Warnings converting ACIP input file: ");
            System.err.println(warnings);
            if (putWarningsInOutput)
                System.err.println("The output contains these warnings.");
-            System.exit(2);
+            retCode = 2;
        }
-        if (verbose) System.err.println("Converted " + args[0] + " perfectly.");
-        System.exit(0);
+        if (0 == retCode) {
+            if (verbose) System.err.println("Converted " + args[0] + " perfectly.");
+        }
+        System.exit(retCode);
+        // DLC NOW: tRAStA is not converter correctly to Unicode, and
+        // no warning is given when converting to TMW.
    }

    /** Writes TMW/Latin to out.  If errors occur in converting a
-     *  tsheg bar, then they are appended to errors if errors is
-     *  non-null.  Returns true upon perfect success, false if errors
+     *  tsheg bar, then they are written into the output, and also
+     *  appended to errors if errors is non-null.  If warnings occur
+     *  in converting a tsheg bar, then they are written into the
+     *  output if writeWarningsToResult is true, and also appended to
+     *  warnings if warnings is non-null.  Returns true upon perfect
+     *  success or if there were merely warnings, false if errors
     *  occurred.
     *  @throws IOException if we cannot write to out
     */
-    public static boolean convertToTMW(ArrayList scan, String latinFont,
-                                       OutputStream out, StringBuffer errors)
+    public static boolean convertToTMW(ArrayList scan,
+                                       OutputStream out,
+                                       StringBuffer errors,
+                                       StringBuffer warnings,
+                                       boolean writeWarningsToResult,
+                                       String warningLevel)
        throws IOException
    {
-        throw new Error("DLC UNIMPLEMENTED");
+        TibetanDocument tdoc = new TibetanDocument();
+		tdoc.setRomanAttributeSet("Courier", 14); // DLC make me configurable.
+        boolean rv
+            = convertToTMW(scan, tdoc, errors, warnings,
+                           writeWarningsToResult, warningLevel);
+        tdoc.writeRTFOutputStream(out);
+        return rv;
    }
+
+    private static boolean convertToTMW(ArrayList scan,
+                                        TibetanDocument tdoc,
+                                        StringBuffer errors,
+                                        StringBuffer warnings,
+                                        boolean writeWarningsToResult,
+                                        String warningLevel)
+        throws IOException
+    {
+        return convertTo(false, scan, null, tdoc, errors, warnings,
+                         writeWarningsToResult, warningLevel);
+    }
+
    // DLC FIXME: sometimes { } is \u0F0B, and sometimes it is a
    // space.  Treat it as a tsheg only when it appears after a
    // syllable or another tsheg.
@ -130,7 +166,8 @@ public class ACIPConverter {
     *  or in converting a tsheg bar, then they are appended to
     *  warnings if warnings is non-null, and they are written to the
     *  result if writeWarningsToResult is true.  Returns the
-     *  conversion upon perfect success, null if errors occurred.
+     *  conversion upon perfect success or if there were merely
+     *  warnings, null if errors occurred.
     */
    public static String convertToUnicode(String acip,
                                          StringBuffer errors,
@ -174,25 +211,43 @@ public class ACIPConverter {
                                           boolean writeWarningsToOut,
                                           String warningLevel)
        throws IOException
+    {
+        return convertTo(true, scan, out, null, errors, warnings,
+                         writeWarningsToOut, warningLevel);
+    }
+
+    private static boolean convertTo(boolean toUnicode, // else to TMW
+                                     ArrayList scan,
+                                     OutputStream out, // for toUnicode mode
+                                     TibetanDocument tdoc, // for !toUnicode mode
+                                     StringBuffer errors,
+                                     StringBuffer warnings,
+                                     boolean writeWarningsToOut,
+                                     String warningLevel)
+        throws IOException
    {
        int sz = scan.size();
        boolean hasErrors = false;
-        BufferedWriter writer
-            = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
+        BufferedWriter writer = null;
+        if (toUnicode)
+            writer
+                = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
        for (int i = 0; i < sz; i++) {
            ACIPString s = (ACIPString)scan.get(i);
            int stype = s.getType();
            if (stype == ACIPString.ERROR) {
                hasErrors = true;
-                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
-                writer.write(s.getText());
-                writer.write("]");
+                String text = "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s.getText() + "]";
+                if (null != writer) writer.write(text);
+                if (null != tdoc) tdoc.appendRoman(text);
            } else if (stype == ACIPString.WARNING) {
                if (writeWarningsToOut) {
-                    writer.write("[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: ");
-                    writer.write(s.getText());
-                    writer.write("]");
+                    String text = "[#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s.getText() + "]";
+                    if (null != writer) writer.write(text);
+                    if (null != tdoc) tdoc.appendRoman(text);
                }
+                // DLC NOW: Warning: We're going with {'}{R}{DA}, but only because our knowledge of prefix rules says that {'}{R+DA} is not a legal Tibetan tsheg bar ("syllable")
+
                if (null != warnings) {
                    warnings.append("Warning: Lexical warning: ");
                    warnings.append(s.getText());
@ -200,13 +255,15 @@ public class ACIPConverter {
                }
            } else {
                if (s.isLatin(stype)) {
-                    if (stype == ACIPString.FOLIO_MARKER)
-                        writer.write("{");
-                    writer.write(s.getText());
-                    if (stype == ACIPString.FOLIO_MARKER)
-                        writer.write("}");
+                    String text
+                        = (((stype == ACIPString.FOLIO_MARKER) ? "{" : "")
+                           + s.getText()
+                           + ((stype == ACIPString.FOLIO_MARKER) ? "}" : ""));
+                    if (null != writer) writer.write(text);
+                    if (null != tdoc) tdoc.appendRoman(text);
                } else {
                    String unicode = null;
+                    DuffCode[] duff = null;
                    if (stype == ACIPString.TIBETAN_NON_PUNCTUATION) {
                        TPairList pl = TPairListFactory.breakACIPIntoChunks(s.getText());
                        String acipError;
@ -214,7 +271,8 @@ public class ACIPConverter {
                        if ((acipError = pl.getACIPError()) != null) {
                            hasErrors = true;
                            String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]";
-                            writer.write(errorMessage);
+                            if (null != writer) writer.write(errorMessage);
+                            if (null != tdoc) tdoc.appendRoman(errorMessage);
                            if (null != errors)
                                errors.append(errorMessage + "\n");
                        } else {
@ -222,7 +280,8 @@ public class ACIPConverter {
                            if (null == pt) {
                                hasErrors = true;
                                String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]";
-                                writer.write(errorMessage);
+                                if (null != writer) writer.write(errorMessage);
+                                if (null != tdoc) tdoc.appendRoman(errorMessage);
                                if (null != errors)
                                    errors.append(errorMessage + "\n");
                            } else {
@ -230,7 +289,8 @@ public class ACIPConverter {
                                if (null == sl) {
                                    hasErrors = true;
                                    String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]";
-                                    writer.write(errorMessage);
+                                    if (null != writer) writer.write(errorMessage);
+                                    if (null != tdoc) tdoc.appendRoman(errorMessage);
                                    if (null != errors)
                                        errors.append(errorMessage + "\n");
                                } else {
@ -240,36 +300,74 @@ public class ACIPConverter {
                                                        s.getText());
                                    if (null != warning) {
                                        if (writeWarningsToOut) {
-                                            writer.write("[#WARNING CONVERTING ACIP DOCUMENT: ");
-                                            writer.write(warning);
-                                            writer.write("]");
+                                            String text
+                                                = ("[#WARNING CONVERTING ACIP DOCUMENT: "
+                                                   + warning + "]");
+                                            if (null != writer) writer.write(text);
+                                            if (null != tdoc) tdoc.appendRoman(text);
                                        }
                                        if (null != warnings) {
                                            warnings.append(warning);
                                            warnings.append('\n');
                                        }
                                    }
-                                    unicode = sl.getUnicode();
-                                    if (null == unicode) throw new Error("FIXME: make this an assertion");
+                                    if (null != writer) {
+                                        unicode = sl.getUnicode();
+                                        if (null == unicode) throw new Error("FIXME: make this an assertion 4");
+                                    }
+                                    if (null != tdoc) {
+                                        duff = sl.getDuff();
+                                        if (0 == duff.length) {
+                                            throw new Error("No DuffCodes for stack list " + sl); // FIXME: make this an assertion
+                                        }
+                                    }
                                }
                            }
                        }
                    } else {
-                        if (stype == ACIPString.START_SLASH)
-                            unicode = "\u0F3C";
-                        else if (stype == ACIPString.END_SLASH)
-                            unicode = "\u0F3D";
-                        else
-                            unicode = ACIPRules.getUnicodeFor(s.getText(), false);
-                        if (null == unicode) throw new Error("FIXME: make this an assertion");
+                        if (stype == ACIPString.START_SLASH) {
+                            if (null != writer) unicode = "\u0F3C";
+                            if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph("(") };
+                        } else if (stype == ACIPString.END_SLASH) {
+                            if (null != writer) unicode = "\u0F3D";
+                            if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph(")") };
+                        } else {
+                            if (null != writer) unicode = ACIPRules.getUnicodeFor(s.getText(), false);
+                            if (null != tdoc) {
+                                if (s.getText().equals("\r") || s.getText().equals("\t") || s.getText().equals("\n")) {
+                                    tdoc.appendRoman(s.getText());
+                                    continue;
+                                }
+                                else {
+                                    String wy = ACIPRules.getWylieForACIPOther(s.getText());
+                                    if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
+                                    duff = new DuffCode[] { TibetanMachineWeb.getGlyph(wy) };
+                                }
+                            }
+                        }
+                        if (null != writer && null == unicode)
+                            throw new Error("FIXME: make this an assertion 1");
+                        if (null != tdoc && (null == duff || 0 == duff.length))
+                            throw new Error("FIXME: make this an assertion 2");
                    }
-                    if (null != unicode) {
-                        writer.write(unicode);
+                    if (null != writer && null != unicode) writer.write(unicode);
+                    if (null != tdoc) {
+                        if (null != duff && 0 != duff.length) {
+                            tdoc.appendDuffCodes(duff);
+                            // DLC NOW FIXME: use TibTextUtils.getVowel logic to make the output beautiful.
+                        } else {
+                            // this happens when you have an
+                            // [#ERROR]-producing tsheg bar.
+                            
+                            // System.err.println("Bad tsheg bar with ACIP {" + s.getText() + "}");
+                        }
                    }
                }
            }
        }
-        writer.close();
+        if (null != writer) {
+            writer.close();
+        }
        return !hasErrors;
    }
 }
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -19,8 +19,12 @@ Contributor(s): ______________________________________.
 package org.thdl.tib.text.ttt;

 import java.util.HashSet;
+import java.util.ArrayList;
 import java.util.HashMap;

+import org.thdl.tib.text.DuffCode;
+import org.thdl.tib.text.TibetanMachineWeb;
+
 /** Canonizes some facts regarding the ACIP transcription system.
 *  @author David Chandler */
 class ACIPRules {
@ -36,7 +40,9 @@ class ACIPRules {
    private static HashSet acipVowels = null;

    private static String[][] baseVowels = new String[][] {
-        // { ACIP, EWTS, EWTS for '\'' + baseVowels[][0] }:
+        // { ACIP, EWTS, EWTS for ACIP {'\'' + baseVowels[][0]}, vowel
+        // numbers (see TibetanMachineWeb's VOWEL_A, VOWEL_o, etc.) 
+        // for ACIP, vowel numbers for ACIP {'\'' + baseVowels[][0]}
        { "A", "a", "A" },
        { "I", "i", "I" },
        { "U", "u", "U" },
@ -70,7 +76,7 @@ class ACIPRules {
                // DLC keep this code in sync with getUnicodeFor.
                // DLC keep this code in sync with getWylieForACIPVowel

-                // DLC '\' for visarga? how shall we do \ the visarga? like a vowel or not?
+                // DLC '\' for virama? how shall we do \ the virama? like a vowel or not?
            }
        }
        return (acipVowels.contains(s));
@ -211,6 +217,39 @@ class ACIPRules {
        return (String)acipVowel2wylie.get(acip);
    }

+    private static HashMap acipOther2wylie = null;
+    /** Returns the EWTS corresponding to the given ACIP puncuation or
+     *  mark.  Returns null if there is no such EWTS. */
+    static final String getWylieForACIPOther(String acip) {
+        if (acipOther2wylie == null) {
+            acipOther2wylie = new HashMap(37);
+
+            // DLC FIXME: check all these again.
+            acipOther2wylie.put(",", "/");
+            acipOther2wylie.put(" ", " ");
+            acipOther2wylie.put(".", "*");
+            acipOther2wylie.put("|", "|");
+            acipOther2wylie.put("`", "!");
+            acipOther2wylie.put(";", ";");
+            acipOther2wylie.put("*", "@");
+            acipOther2wylie.put("#", "@#");
+            acipOther2wylie.put("%", "%");
+            acipOther2wylie.put("&", "&");
+
+            acipOther2wylie.put("0", "0");
+            acipOther2wylie.put("1", "1");
+            acipOther2wylie.put("2", "2");
+            acipOther2wylie.put("3", "3");
+            acipOther2wylie.put("4", "4");
+            acipOther2wylie.put("5", "5");
+            acipOther2wylie.put("6", "6");
+            acipOther2wylie.put("7", "7");
+            acipOther2wylie.put("8", "8");
+            acipOther2wylie.put("9", "9");
+        }
+        return (String)acipOther2wylie.get(acip);
+    }
+
    private static HashMap superACIP2unicode = null;
    private static HashMap subACIP2unicode = null;
    /** If acip is an ACIP consonant or vowel or punctuation mark,
@ -416,6 +455,42 @@ class ACIPRules {
            if (null != u) return u;
        }
        return (String)superACIP2unicode.get(acip);
+    }

+
+
+    /** DLC DOC: Gets the duffcodes for vowel, such that they look good with hashKey, and appends them to r. */
+    static void getDuffForACIPVowel(ArrayList r, String hashKey, String vowel) {
+        if (null == vowel) return;
+        if (null == getWylieForACIPVowel(vowel)) // FIXME: expensive assertion!  Use assert.
+            throw new IllegalArgumentException("Vowel " + vowel + " isn't in the small set of vowels we handle correctly.");
+        if (!TibetanMachineWeb.isKnownHashKey(hashKey)) // FIXME: expensive assertion!  Use assert.
+            throw new IllegalArgumentException("bad hashKey");
+
+        // Order matters here.
+        if (vowel.indexOf("'U") >= 0)
+            r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_U));
+        else {
+            if (vowel.indexOf('\'') >= 0)
+                r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_A));
+            if (vowel.indexOf("EE") >= 0)
+                r.add(TibetanMachineWeb.getGlyph("ai"));
+            else if (vowel.indexOf('E') >= 0)
+                r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_e));
+            if (vowel.indexOf("OO") >= 0)
+                r.add(TibetanMachineWeb.getGlyph("au"));
+            else if (vowel.indexOf('O') >= 0)
+                r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_o));
+            if (vowel.indexOf('I') >= 0)
+                r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_i));
+            if (vowel.indexOf('U') >= 0)
+                r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_u));
+            if (vowel.indexOf('i') >= 0)
+                r.add(TibetanMachineWeb.getGlyph("-i"));
+        }
+        if (vowel.indexOf('m') >= 0)
+            r.add(TibetanMachineWeb.getGlyph("M"));
+        if (vowel.indexOf(':') >= 0)
+            r.add(TibetanMachineWeb.getGlyph("H"));
    }
 }
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -319,6 +319,16 @@ tstHelper("MSTAN"); // ambiguous with regard to prefix rules



+tstHelper("KA'", "[(K . A), (' . )]",
+          new String[] { "{KA}{'}" },
+          new String[] { "{KA}{'}" },
+          "{KA}{'}"); // DLC NOW
+
+         tstHelper("A'AAMA", "{A}{'}{AA}{MA}"); // FIXME: how should we parse this?
+
+         tstHelper("K+K+KA", "{K+}{K+}{KA}");
+
+

        // If you're not careful, you'll think GGYES is a legal
        // Tibetan tsheg bar and parse it as {G}{G+YE}{S}.  But it's
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -19,6 +19,10 @@ Contributor(s): ______________________________________.
 package org.thdl.tib.text.ttt;

 import org.thdl.util.ThdlDebug;
+import org.thdl.tib.text.TibetanMachineWeb;
+import org.thdl.tib.text.DuffCode;
+
+import java.util.ArrayList;

 /** An ordered pair used in ACIP-to-TMW conversion.  The left side is
 *  the consonant or empty; the right side is the vowel, '+', or '-'.
@ -70,7 +74,9 @@ class TPair {

    /** Returns an TPair that is like this one except that it is
     *  missing N characters.  The characters are taken from r, the
-     *  right side, first and from l, the left side, second.
+     *  right side, first and from l, the left side, second.  The pair
+     *  returned may be illegal, such as the (A . ') you can get from
+     *  ACIP {A'AAMA}.
     *  @throw IllegalArgumentException if N is out of range */
    TPair minusNRightmostACIPCharacters(int N)
        throws IllegalArgumentException
@ -80,7 +86,7 @@ class TPair {
        if (N > size())
            throw new IllegalArgumentException("Don't have that many to remove.");
        if (N < 1)
-            throw new IllegalArgumentException("You should't call this if you don't want to remove any.");
+            throw new IllegalArgumentException("You shouldn't call this if you don't want to remove any.");
        if (null != r && (sz = r.length()) > 0) {
            int min = Math.min(sz, N);
            newR = r.substring(0, sz - min);
@ -101,7 +107,7 @@ class TPair {
            return false;
        if (null != l && !ACIPRules.isConsonant(l))
            return false;
-        if (null != r && !ACIPRules.isVowel(l))
+        if (null != r && !ACIPRules.isVowel(r))
            return false;
        return true;
    }
@ -146,8 +152,14 @@ class TPair {
        return (l != null && l.length() == 1 && (ch = l.charAt(0)) >= '0' && ch <= '9');
    }

-    /** Returns the EWTS Wylie that corresponds to this pair.  Untested. */
    String getWylie() {
+        return getWylie(false);
+    }
+
+    /** Returns the EWTS Wylie that corresponds to this pair if
+     *  justLeft is false, or the EWTS Wylie that corresponds to just
+     *  {@link #getLeft()} if justLeft is true. */
+    String getWylie(boolean justLeft) {
        String leftWylie = null;
        if (getLeft() != null) {
            leftWylie = ACIPRules.getWylieForACIPConsonant(getLeft());
@ -156,6 +168,8 @@ class TPair {
                    leftWylie = getLeft();
            }
        }
+        if (null == leftWylie) leftWylie = "";
+        if (justLeft) return leftWylie;
        String rightWylie = null;
        if ("-".equals(getRight()))
            rightWylie = ".";
@ -163,7 +177,6 @@ class TPair {
            rightWylie = "+";
        else if (getRight() != null)
            rightWylie = ACIPRules.getWylieForACIPVowel(getRight());
-        if (null == leftWylie) leftWylie = "";
        if (null == rightWylie) rightWylie = "";
        return leftWylie + rightWylie;
    }
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -609,5 +609,24 @@ class TPairList {
        }
    }

+    /** Appends the DuffCodes that correspond to this grapheme cluster
+     *  to duff.  Assumes this is one grapheme cluster. */
+    void getDuff(ArrayList duff) {
+        StringBuffer wylieForConsonant = new StringBuffer();
+        for (int x = 0; x + 1 < size(); x++) {
+            wylieForConsonant.append(get(x).getWylie(false));
+        }
+        TPair lastPair = get(size() - 1);
+        wylieForConsonant.append(lastPair.getWylie(true));
+        String hashKey = wylieForConsonant.toString();
+        if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
+            hashKey = hashKey.replace('+', '-');
+            if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
+                throw new Error("How did this happen?");
+            }
+        }
+        duff.add(TibetanMachineWeb.getGlyph(hashKey));
+        ACIPRules.getDuffForACIPVowel(duff, hashKey, lastPair.getRight());
+    }
 }
 // DLC FIXME: handle 'o' and 'x', e.g. KAo and NYAx.
--- a/source/org/thdl/tib/text/ttt/TStackList.java
+++ b/source/org/thdl/tib/text/ttt/TStackList.java
@ -20,6 +20,7 @@ package org.thdl.tib.text.ttt;

 import org.thdl.tib.text.TibTextUtils;
 import org.thdl.tib.text.TGCList;
+import org.thdl.tib.text.DuffCode;

 import java.util.ArrayList;
 import java.util.ListIterator;
@ -216,8 +217,21 @@ class TStackList {
        }
        return u.toString();
    }
+    /** DLC DOC */
+    DuffCode[] getDuff() {
+        ArrayList al = new ArrayList(size()*2); // rough estimate
+        int count = 0;
+        for (int i = 0; i < size(); i++) {
+            get(i).getDuff(al);
+        }
+        if (size() > 0 && al.size() == 0) {
+            throw new Error("But this stack list, " + this + ", contains " + size() + " stacks!  How can it not have DuffCodes associated with it?");
+        }
+        return (DuffCode[])al.toArray(new DuffCode[] { });
+    }
 }

+/** Too simple to comment. */
 class BoolPair {
    boolean isLegal;
    boolean isLegalAndHasAVowelOnRoot;