Formatting in TMW documents is not preserved. I've added an identity

tranformation, TMW->TMW, to help me debug this problem.
2005-02-13 00:34:47 +00:00 · 2005-02-13 00:34:47 +00:00 · 83f499b7a8
commit 83f499b7a8
parent 9025fb42d6
6 changed files with 218 additions and 91 deletions
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@ -457,7 +457,10 @@ class ConvertDialog extends JDialog
        } else if (FIND_ALL_NON_TM == ct) {
            newFileNamePrefix = "AllNonTM__";
            newFileNameExtension = ".TXT";
-        } else { // conversion {to Wylie or TM} mode
+        } else if (TMW_TO_SAME_TWM == ct) {
+            newFileNamePrefix = "TMW_to_same_TMW__";
+            newFileNameExtension = ".RTF";
+        } else { // conversion mode
            if (TMW_TO_WYLIE == ct) {
                newFileNamePrefix = suggested_WYLIE_prefix;
            } else if (TMW_TO_WYLIE_TEXT == ct) {
--- a/source/org/thdl/tib/input/ConverterGUI.java
+++ b/source/org/thdl/tib/input/ConverterGUI.java
@ -93,6 +93,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
                                          "Attention required",
                                          JOptionPane.ERROR_MESSAGE);
            return false;
+        } else if (50 == returnCode) {
+            JOptionPane.showMessageDialog(cd,
+                                          "Error doing RTF->RTF identity copy.",
+                                          "Attention required",
+                                          JOptionPane.ERROR_MESSAGE);
+            return false;
        } else if (43 == returnCode) {
            JOptionPane.showMessageDialog(cd,
                                          "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
@ -192,13 +198,15 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
            if (null == owner) {
                convDialog
                    = new ConvertDialog(new ConverterGUI(),
-                                        CHOICES,
+                                        ThdlOptions.getBooleanOption("thdl.debug")
+                                        ? DEBUG_CHOICES : CHOICES,
                                        true);
            } else {
                convDialog
                    = new ConvertDialog(owner,
                                        new ConverterGUI(),
-                                        CHOICES,
+                                        ThdlOptions.getBooleanOption("thdl.debug")
+                                        ? DEBUG_CHOICES : CHOICES,
                                        true);
            }

--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@ -26,6 +26,7 @@ import java.awt.*;
    @author Nathaniel Garson, Tibetan and Himalayan Digital Library */
 interface FontConverterConstants
 {
+    final String TMW_TO_SAME_TWM = "TMW to the same TMW (for testing only) (RTF->RTF)";
    final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
    final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
    final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
@ -56,6 +57,23 @@ interface FontConverterConstants
        FIND_ALL_NON_TM
    };

+    final String[] DEBUG_CHOICES = new String[] {
+        TMW_TO_SAME_TWM,
+        ACIP_TO_UNI_TEXT,
+        ACIP_TO_TMW,
+        TMW_TO_ACIP,
+        TMW_TO_ACIP_TEXT,
+        TM_TO_TMW,
+        TMW_TO_TM,
+        TMW_TO_UNI,
+        TMW_TO_WYLIE,
+        TMW_TO_WYLIE_TEXT,
+        FIND_SOME_NON_TMW,
+        FIND_SOME_NON_TM,
+        FIND_ALL_NON_TMW,
+        FIND_ALL_NON_TM
+    };
+
    final String suggested_WYLIE_prefix = "THDL_Wylie_";
    final String suggested_ACIP_prefix = "ACIP_";
    final String suggested_TO_TMW_prefix = "TMW_";
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@ -66,6 +66,7 @@ public class TibetanConverter implements FontConverterConstants {
     *  @return the exit code. */
    public static int realMain(String[] args, PrintStream out) {
        try {
+            boolean convertTmwToTmwMode = false;
            boolean convertToUnicodeMode = false;
            boolean convertToTMMode = false;
            boolean convertACIPToUniMode = false;
@ -105,6 +106,8 @@ public class TibetanConverter implements FontConverterConstants {
                             || args[numArgs - 3].equals("long"))
                        || !((findAllNonTMWMode
                              = args[numArgs - 2].equals("--find-all-non-tmw"))
+                             || (convertTmwToTmwMode
+                                 = args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
                             || (convertToTMMode
                                 = args[numArgs - 2].equals("--to-tibetan-machine"))
                             || (convertToTMWMode
@ -140,6 +143,7 @@ public class TibetanConverter implements FontConverterConstants {
                out.println("                 --warning-level None|Some|Most|All");
                out.println("                 --acip-to-tibetan-warning-and-error-messages short|long");
                out.println("                 --find-all-non-tmw | --find-some-non-tmw");
+                out.println("                   | --tmw-to-tmw-for-testing");
                out.println("                   | --to-tibetan-machine | --to-tibetan-machine-web");
                out.println("                   | --to-unicode | --to-wylie | --to-acip");
                out.println("                   | --to-wylie-text | --to-acip-text");
@ -246,6 +250,8 @@ public class TibetanConverter implements FontConverterConstants {
                    conversionTag = TMW_TO_ACIP_TEXT;
                } else if (convertToUnicodeMode) {
                    conversionTag = TMW_TO_UNI;
+                } else if (convertTmwToTmwMode) {
+                    conversionTag = TMW_TO_SAME_TWM;
                } else if (convertToTMWMode) {
                    conversionTag = TM_TO_TMW;
                } else if (convertACIPToUniMode) {
@ -394,6 +400,7 @@ public class TibetanConverter implements FontConverterConstants {

                int exitCode = 0;
                ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
+                                 + ((TMW_TO_SAME_TWM == ct) ? 1 : 0)
                                 + ((TMW_TO_UNI == ct) ? 1 : 0)
                                 + ((TM_TO_TMW == ct) ? 1 : 0)
                                 + ((TMW_TO_ACIP == ct) ? 1 : 0)
@ -402,7 +409,14 @@ public class TibetanConverter implements FontConverterConstants {
                                 + ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
                                 == 1);
                long numAttemptedReplacements[] = new long[] { 0 };
-                if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
+                if (TMW_TO_SAME_TWM == ct) {
+                    // Identity conversion for testing
+                    if (tdoc.identityTmwToTmwConversion(0,
+                                                        tdoc.getLength(),
+                                                        numAttemptedReplacements)) {
+                        exitCode = 50;
+                    }
+                } else if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
                    // Convert to THDL Wylie:
                    if (!tdoc.toWylie(0,
                                      tdoc.getLength(),
--- a/source/org/thdl/tib/text/TibetanDocument.java
+++ b/source/org/thdl/tib/text/TibetanDocument.java
@ -219,13 +219,18 @@ public class TibetanDocument extends DefaultStyledDocument {
 * @param color the color in which to insert, which is used if and only
 * if {@link #colorsEnabled() colors are enabled}
 */
-	public int insertDuff(int pos, DuffData[] glyphs, Color color) {
-        return insertDuff(tibetanFontSize, pos, glyphs, true, color);
-	}
+public int insertDuff(int pos, DuffData[] glyphs, Color color) {
+    return insertDuff(tibetanFontSize, pos, glyphs, true, color);
+}

-	public int insertDuff(int pos, DuffData[] glyphs) {
-        return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black);
-	}
+/**
+* Inserts a stretch of TibetanMachineWeb data into the document.
+* @param pos the position at which you want to insert text
+* @param glyphs the array of Tibetan data you want to insert
+*/
+public int insertDuff(int pos, DuffData[] glyphs) {
+    return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black);
+}

 /**
 * Appends glyph to the end of this document.
@ -255,7 +260,7 @@ public class TibetanDocument extends DefaultStyledDocument {
                     glyph.font, asTMW);
    }

-	/** Replacing can be more efficient than inserting and then
+    /** Replacing can be more efficient than inserting and then
        removing. This replaces the glyphs at position [startOffset,
        endOffset) with data, which is interpreted as TMW glyphs if
        asTMW is true and as TM glyphs otherwise.  The font size for
@ -265,16 +270,16 @@ public class TibetanDocument extends DefaultStyledDocument {
    private void replaceDuffs(int fontSize, int startOffset,
                              int endOffset, String data,
                              int newFontIndex, boolean asTMW) {
-		MutableAttributeSet mas
+        MutableAttributeSet mas
            = ((asTMW)
               ? TibetanMachineWeb.getAttributeSet(newFontIndex)
               : TibetanMachineWeb.getAttributeSetTM(newFontIndex));
        StyleConstants.setFontSize(mas, fontSize);
-		try {
+        try {
            replace(startOffset, endOffset - startOffset, data, mas);
        } catch (BadLocationException ble) {
            ThdlDebug.noteIffyCode();
-		}
+        }
    }

 	/** Replacing can be more efficient than inserting and then
@ -297,25 +302,25 @@ public class TibetanDocument extends DefaultStyledDocument {
 		}
    }

-	private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
-        return insertDuff(fontSize, pos, glyphs, asTMW, Color.black);
-    }
-	private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW, Color color) {
-		if (glyphs == null)
-			return pos;
+private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
+    return insertDuff(fontSize, pos, glyphs, asTMW, Color.black);
+}
+private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW, Color color) {
+    if (glyphs == null)
+        return pos;

-		MutableAttributeSet mas;
-		for (int i=0; i<glyphs.length; i++) {
-            mas = ((asTMW)
-                   ? TibetanMachineWeb.getAttributeSet(glyphs[i].font)
-                   : TibetanMachineWeb.getAttributeSetTM(glyphs[i].font));
-            if (null == mas)
-                throw new Error("Cannot insert that DuffData; the font number is too low or too high; perhaps the programmer has asTMW set incorrectly?");
-			appendDuff(fontSize, pos, glyphs[i].text, mas, color);
-			pos += glyphs[i].text.length();
-		}
-		return pos;
-	}
+    MutableAttributeSet mas;
+    for (int i=0; i<glyphs.length; i++) {
+        mas = ((asTMW)
+               ? TibetanMachineWeb.getAttributeSet(glyphs[i].font)
+               : TibetanMachineWeb.getAttributeSetTM(glyphs[i].font));
+        if (null == mas)
+            throw new Error("Cannot insert that DuffData; the font number is too low or too high; perhaps the programmer has asTMW set incorrectly?");
+        appendDuff(fontSize, pos, glyphs[i].text, mas, color);
+        pos += glyphs[i].text.length();
+    }
+    return pos;
+}

 /**
 * Converts the entire document into Extended Wylie.
@ -629,7 +634,7 @@ public class TibetanDocument extends DefaultStyledDocument {
    */
    public boolean convertToTM(int begin, int end, StringBuffer errors,
                               long numAttemptedReplacements[]) {
-        return convertHelper(begin, end, true, false, errors, null,
+        return convertHelper(begin, end, "TMW->TM", errors, null,
                             numAttemptedReplacements);
    }

@ -651,7 +656,7 @@ public class TibetanDocument extends DefaultStyledDocument {
    */
    public boolean convertToTMW(int begin, int end, StringBuffer errors,
                                long numAttemptedReplacements[]) {
-        return convertHelper(begin, end, false, false, errors, null,
+        return convertHelper(begin, end, "TM->TMW", errors, null,
                             numAttemptedReplacements);
    }

@ -676,7 +681,7 @@ public class TibetanDocument extends DefaultStyledDocument {
    public boolean convertToUnicode(int begin, int end, StringBuffer errors,
                                    String unicodeFont,
                                    long numAttemptedReplacements[]) {
-        return convertHelper(begin, end, false, true, errors, unicodeFont,
+        return convertHelper(begin, end, "TMW->Unicode", errors, unicodeFont,
                             numAttemptedReplacements);
    }

@ -790,8 +795,11 @@ public class TibetanDocument extends DefaultStyledDocument {
        return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
    }

-    /** Helper function.  Converts TMW->TM if !toUnicode&&toTM,
-        TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode.
+    /** Helper function.  Converts TMW->TM, TM->TMW, TMW->Unicode, or
+        TMW-> the very same TMW [just for testing Java's RTF support]
+        depending on mode.
+        @param mode one of "TMW->TMW-identity" (a null conversion for
+        testing), "TM->TMW", "TMW->TM", or "TMW->Unicode"
        @param errors if non-null, then notes about all exceptional
        cases will be appended to this StringBuffer
        @return false on 100% success, true if any exceptional case
@ -799,8 +807,8 @@ public class TibetanDocument extends DefaultStyledDocument {
        @see #convertToUnicode(int,int,StringBuffer,String,long[])
        @see #convertToTMW(int,int,StringBuffer,long[]) 
        @see #convertToTM(int,int,StringBuffer,long[]) */
-    private boolean convertHelper(int begin, int end, boolean toTM,
-                                  boolean toUnicode, StringBuffer errors,
+    private boolean convertHelper(int begin, int end, String mode,
+                                  StringBuffer errors,
                                  String unicodeFont,
                                  long numAttemptedReplacements[]) {
        // To preserve formatting, we go paragraph by paragraph.
@ -808,6 +816,9 @@ public class TibetanDocument extends DefaultStyledDocument {
        // Use positions, not offsets, because our work on paragraph K
        // will affect the offsets of paragraph K+1.

+        ThdlDebug.verify("TMW->TMW-identity" == mode || "TMW->Unicode" == mode
+                         || "TM->TMW" == mode || "TMW->TM" == mode);
+
        Position finalEndPos;
        if (end < 0) {
            end = getLength();
@ -835,13 +846,13 @@ public class TibetanDocument extends DefaultStyledDocument {
                noMore = true;
                ceh.doErrorWrapup = true;
            }
-            convertHelperHelper(thisParagraph.getStartOffset(),
-                                ((finalEndPos.getOffset() < p_end)
-                                 ? finalEndPos.getOffset()
-                                 : p_end),
-                                toTM, toUnicode, errors, ceh,
-                                unicodeFont,
-                                numAttemptedReplacements);
+            convertParagraph(thisParagraph.getStartOffset(),
+                             ((finalEndPos.getOffset() < p_end)
+                              ? finalEndPos.getOffset()
+                              : p_end),
+                             mode, errors, ceh,
+                             unicodeFont,
+                             numAttemptedReplacements);
        }
        if (!ceh.errorReturn
            && pl != getParagraphs(begin, finalEndPos.getOffset()).length) {
@ -860,24 +871,22 @@ public class TibetanDocument extends DefaultStyledDocument {
        return ceh.errorReturn;
    }

-    /** See the sole caller, convertHelper. */
-    private void convertHelperHelper(int begin, int end, boolean toTM,
-                                     boolean toUnicode, StringBuffer errors,
-                                     ConversionErrorHelper ceh,
-                                     String unicodeFont,
-                                     long numAttemptedReplacements[]) {
-        final boolean debug = false;
-        if (debug)
-            System.out.println("cHH: [" + begin + ", " + end + ")");
+    /** See the sole caller, {@link #convertHelper}.  begin and end
+        should specify the bounds of a paragraph. */
+    private void convertParagraph(int begin, int end, String mode,
+                                  StringBuffer errors,
+                                  ConversionErrorHelper ceh,
+                                  String unicodeFont,
+                                  long numAttemptedReplacements[]) {
+        final int debug = 0;
+        if (debug > 0)
+            System.out.println("convertParagraph: [" + begin + ", " + end + ")");
        // DLC FIXME: here's an idea, a compressor -- use the '-' (ord
        // 45) or ' ' (ord 32) glyph from the same font as the
        // preceding glyph, never others.  This reduces the size of a
        // TMW RTF file by a factor of 3 sometimes.  To do it, use
-        // this routine, but give it the ability to go from TMW->TMW
-        // and TM->TM.
-
-        // toTM is ignored when toUnicode is true:
-        ThdlDebug.verify(!toUnicode || !toTM);
+        // this routine, but give it the ability to go from
+        // TMW->compressed-TMW and TM->compressed-TM.

        boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
        if (end < 0)
@ -910,8 +919,10 @@ public class TibetanDocument extends DefaultStyledDocument {
            while (i < endPos.getOffset()) {
                AttributeSet attr = getCharacterElement(i).getAttributes();
                String fontName = StyleConstants.getFontFamily(attr);
-				int fontNum
-                    = ((toTM || toUnicode)
+                int fontNum
+                    = (("TMW->TM" == mode
+                        || "TMW->Unicode" == mode
+                        || "TMW->TMW-identity" == mode)
                       ? TibetanMachineWeb.getTMWFontNumber(fontName)
                       : TibetanMachineWeb.getTMFontNumber(fontName));

@ -926,14 +937,18 @@ public class TibetanDocument extends DefaultStyledDocument {

                    DuffCode dc = null;
                    String unicode = null;
-                    if (toUnicode) {
+                    if ("TMW->Unicode" == mode) {
                        unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
                                                                    getText(i,1).charAt(0));
                    } else {
-                        if (toTM) {
+                        if ("TMW->TM" == mode) {
                            dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
                                                              getText(i,1).charAt(0),
                                                              replacementFontIndex);
+                        } else if ("TMW->TMW-identity" == mode) {
+                            dc = TibetanMachineWeb.mapTMWtoItself(fontNum - 1,
+                                                                  getText(i,1).charAt(0),
+                                                                  replacementFontIndex);
                        } else {
                            dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
                                                              getText(i,1).charAt(0),
@ -942,7 +957,7 @@ public class TibetanDocument extends DefaultStyledDocument {
                    }
                    if (replacementQueue.length() > 0
                        && (mustReplace
-                            || ((!toUnicode
+                            || (("TMW->Unicode" != mode
                                 && null != dc
                                 && dc.getFontNum() != replacementFontIndex)
                                || fontSize != replacementFontSize))) {
@ -955,7 +970,7 @@ public class TibetanDocument extends DefaultStyledDocument {

                        // this if-else statement is duplicated below; beware!
                        int endIndex = mustReplace ? mustReplaceUntil : i;
-                        if (toUnicode) {
+                        if ("TMW->Unicode" == mode) {
                            UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
                            replaceDuffsWithUnicode(replacementFontSize,
                                                    replacementStartIndex,
@ -968,13 +983,13 @@ public class TibetanDocument extends DefaultStyledDocument {
                                         endIndex,
                                         replacementQueue.toString(),
                                         replacementFontIndex,
-                                         !toTM);
+                                         mode != "TMW->TM");
                        }

                        // i += numnewchars - numoldchars;
-                        if (debug)
+                        if (debug > 10)
                            System.out.println("Incrementing i by " + (replacementQueue.length()
-                              - (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex);
+                                                                       - (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex);
                        i += (replacementQueue.length()
                              - (endIndex - replacementStartIndex));

@ -986,13 +1001,13 @@ public class TibetanDocument extends DefaultStyledDocument {
                        if (0 == replacementQueue.length()) {
                            replacementFontSize = fontSize;
                            replacementStartIndex = i;
-                            if (!toUnicode) {
+                            if ("TMW->Unicode" != mode) {
                                replacementFontIndex = dc.getFontNum();
                            }
                        }
-                        if (toUnicode) {
+                        if ("TMW->Unicode" == mode) {
                            replacementQueue.append(unicode);
-                            if (debug)
+                            if (debug > 0)
                                System.out.println("unicode rq.append: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(unicode));
                        } else {
                            replacementQueue.append(dc.getCharacter());
@ -1011,9 +1026,7 @@ public class TibetanDocument extends DefaultStyledDocument {
                            ceh.problemGlyphsTable.put(cgf, "yes this character appears once");
                            if (null != errors) {
                                String err
-                                    = (toUnicode
-                                       ? "TMW->Unicode"
-                                       : (toTM ? "TMW->TM" : "TM->TMW"))
+                                    = mode
                                    + " conversion failed for a glyph:\nFont is "
                                    + fontName + ", glyph number is "
                                    + (int)getText(i,1).charAt(0)
@ -1028,9 +1041,15 @@ public class TibetanDocument extends DefaultStyledDocument {
                                // the beginning of the document,
                                // after a 'a' character (i.e.,
                                // \tm0062 or \tmw0063):
-                                equivalent[0].setData((toUnicode || toTM) ? (char)63 : (char)62, 1);
+                                equivalent[0].setData((("TMW->Unicode" == mode
+                                                        || "TMW->TM" == mode)
+                                                       ? (char)63 : (char)62),
+                                                      1);
                                insertDuff(72, ceh.errorGlyphLocation++,
-                                           equivalent, toUnicode || toTM);
+                                           equivalent,
+                                           ("TMW->Unicode" == mode
+                                            || "TMW->TMW-identity" == mode
+                                            || "TMW->TM" == mode));
                                ++i;
                                // Don't later replace this last guy:
                                if (replacementStartIndex < ceh.errorGlyphLocation) {
@ -1038,7 +1057,10 @@ public class TibetanDocument extends DefaultStyledDocument {
                                }
                                equivalent[0].setData(getText(i,1), fontNum);
                                insertDuff(72, ceh.errorGlyphLocation++,
-                                           equivalent, toUnicode || toTM);
+                                           equivalent,
+                                           ("TMW->Unicode" == mode
+                                            || "TMW->TMW-identity" == mode
+                                            || "TMW->TM" == mode));
                                ++i;
                                // Don't later replace this last guy:
                                if (replacementStartIndex < ceh.errorGlyphLocation) {
@ -1056,7 +1078,10 @@ public class TibetanDocument extends DefaultStyledDocument {
                        }
                    }
                } else {
-                    if (debug) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0));
+                    // FIXME: are we doing the right thing here?  I
+                    // think so -- I think we're just not replacing
+                    // the current character, but I'm not at all sure.
+                    if (debug > 0) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0));
                    if (replacementQueue.length() > 0) {
                        if (!mustReplace) {
                            mustReplaceUntil = i;
@ -1069,14 +1094,14 @@ public class TibetanDocument extends DefaultStyledDocument {
            if (replacementQueue.length() > 0) {
                // this if-else statement is duplicated above; beware!
                int endIndex = mustReplace ? mustReplaceUntil : i;
-                if (toUnicode) {
+                if ("TMW->Unicode" == mode) {
                    UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
                    replaceDuffsWithUnicode(replacementFontSize,
                                            replacementStartIndex,
                                            endIndex,
                                            replacementQueue.toString(),
                                            unicodeFont);
-                    if (debug)
+                    if (debug > 0)
                        System.out.println("unicode rq: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(replacementQueue.toString()));
                } else {
                    replaceDuffs(replacementFontSize,
@ -1084,7 +1109,7 @@ public class TibetanDocument extends DefaultStyledDocument {
                                 endIndex,
                                 replacementQueue.toString(),
                                 replacementFontIndex,
-                                 !toTM);
+                                 "TMW->TM" != mode);
                }
            }
            ceh.lastOffsetExamined = endPos.getOffset() - 1;
@ -1092,7 +1117,9 @@ public class TibetanDocument extends DefaultStyledDocument {
            if (ceh.doErrorWrapup && ceh.errorGlyphLocation > 0) {
                // Bracket the bad stuff with U+0F3C on the left
                // and U+0F3D on the right:
-                if (!(toUnicode || toTM)) {
+                if (!("TMW->Unicode" == mode
+                      || "TMW->TM" == mode
+                      || "TMW->TMW-identity" == mode)) {
                    equivalent[0].setData((char)209, 1);
                    insertDuff(72, ceh.errorGlyphLocation++,
                               equivalent, false);
@ -1254,6 +1281,30 @@ public class TibetanDocument extends DefaultStyledDocument {
        }
    }

+    /**
+       To test Java's RTF support, it's helpful to just try and do an
+       identity TMW->TMW transformation (you can think of it as a
+       converter that converts nothing).  I'm curious to see if the
+       problem we have with TMW->Unicode conversions failing to
+       preserve whitespace is a bug in our code or a bug in Java's RTF
+       support, and this provides one data point.
+       
+       @return false on 100% success, true if any exceptional case was
+       encountered
+       @exception Error if start or end is out of range */
+    public boolean identityTmwToTmwConversion(int start,
+                                              int end,
+                                              long numAttemptedReplacements[]) {
+        StringBuffer errors = new StringBuffer();
+        boolean r = convertHelper(start, end, "TMW->TMW-identity",
+                                  errors, "Unicode Font should not be used",
+                                  numAttemptedReplacements);
+        System.err.println("<TMW_TO_SAME_TWM-errors>");
+        System.err.println(errors.toString());
+        System.err.println("</TMW_TO_SAME_TWM-errors>");
+        return r;
+    }
+
    /** Returns all the paragraph elements in this document that
     *  contain glyphs with offsets in the range [start, end) where
     *  end < 0 is treated as the document's length.  Note that roman,
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -1372,6 +1372,39 @@ private static final DuffCode TMW_cr = new DuffCode(1, '\r');
 private static final DuffCode TMW_lf = new DuffCode(1, '\n');
 private static final DuffCode TMW_tab = new DuffCode(1, '\t');

+/** An identity function used merely for testing. */
+public static DuffCode mapTMWtoItself(int font, int ordinal, int suggestedFont) {
+    if (font < 0 || font > 9)
+        return null;
+    if (ordinal >= 255) {
+        throw new Error("I didn't know that ever happened.");
+    }
+    if (ordinal < 32) {
+        if (ordinal == (int)'\r') {
+            if (0 == suggestedFont)
+                return TMW_cr;
+            else
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
+        } else if (ordinal == (int)'\n') {
+            if (0 == suggestedFont)
+                return TMW_lf;
+            else
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
+        } else if (ordinal == (int)'\t') {
+            if (0 == suggestedFont)
+                return TMW_tab;
+            else
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
+        } else {
+            // for robustness, just return font 1, char ordinal.
+            ThdlDebug.noteIffyCode();
+            return null;
+        }
+    }
+    return new DuffCode(font + 1, (char)ordinal);
+}
+
+
 /** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
    to the given TibetanMachine font
    (0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
@ -1406,17 +1439,17 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
            if (0 == suggestedFont)
                return TMW_cr;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else if (ordinal == (int)'\n') {
            if (0 == suggestedFont)
                return TMW_lf;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else if (ordinal == (int)'\t') {
            if (0 == suggestedFont)
                return TMW_tab;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else {
            // for robustness, just return font 1, char ordinal.
            ThdlDebug.noteIffyCode();
@ -1430,7 +1463,7 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
    // assume that a machine goofed along the way.  (FIXME: optionally
    // warn.)
    if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
-        return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+        return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
    }
    return TMtoTMW[font][ordinal-32];
 }
@ -1465,17 +1498,17 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
            if (0 == suggestedFont)
                return TM_cr;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else if (ordinal == (int)'\n') {
            if (0 == suggestedFont)
                return TM_lf;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else if (ordinal == (int)'\t') {
            if (0 == suggestedFont)
                return TM_tab;
            else
-                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+                return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
        } else {
            // for robustness, just return font 1, char ordinal.
            ThdlDebug.noteIffyCode();
@ -1488,7 +1521,7 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
        return new DuffCode(1, (char)ordinal);
    }
    if ((0 != suggestedFont) && (32 == ordinal)) {
-        return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
+        return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
    }
    DuffCode ans = TMWtoTM[font][ordinal-32];
    return ans;