Formatting in TMW documents is not preserved. I've added an identity

tranformation, TMW->TMW, to help me debug this problem.
This commit is contained in:
dchandler 2005-02-13 00:34:47 +00:00
parent 9025fb42d6
commit 83f499b7a8
6 changed files with 218 additions and 91 deletions

View file

@ -457,7 +457,10 @@ class ConvertDialog extends JDialog
} else if (FIND_ALL_NON_TM == ct) { } else if (FIND_ALL_NON_TM == ct) {
newFileNamePrefix = "AllNonTM__"; newFileNamePrefix = "AllNonTM__";
newFileNameExtension = ".TXT"; newFileNameExtension = ".TXT";
} else { // conversion {to Wylie or TM} mode } else if (TMW_TO_SAME_TWM == ct) {
newFileNamePrefix = "TMW_to_same_TMW__";
newFileNameExtension = ".RTF";
} else { // conversion mode
if (TMW_TO_WYLIE == ct) { if (TMW_TO_WYLIE == ct) {
newFileNamePrefix = suggested_WYLIE_prefix; newFileNamePrefix = suggested_WYLIE_prefix;
} else if (TMW_TO_WYLIE_TEXT == ct) { } else if (TMW_TO_WYLIE_TEXT == ct) {

View file

@ -93,6 +93,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
"Attention required", "Attention required",
JOptionPane.ERROR_MESSAGE); JOptionPane.ERROR_MESSAGE);
return false; return false;
} else if (50 == returnCode) {
JOptionPane.showMessageDialog(cd,
"Error doing RTF->RTF identity copy.",
"Attention required",
JOptionPane.ERROR_MESSAGE);
return false;
} else if (43 == returnCode) { } else if (43 == returnCode) {
JOptionPane.showMessageDialog(cd, JOptionPane.showMessageDialog(cd,
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?", "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
@ -192,13 +198,15 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
if (null == owner) { if (null == owner) {
convDialog convDialog
= new ConvertDialog(new ConverterGUI(), = new ConvertDialog(new ConverterGUI(),
CHOICES, ThdlOptions.getBooleanOption("thdl.debug")
? DEBUG_CHOICES : CHOICES,
true); true);
} else { } else {
convDialog convDialog
= new ConvertDialog(owner, = new ConvertDialog(owner,
new ConverterGUI(), new ConverterGUI(),
CHOICES, ThdlOptions.getBooleanOption("thdl.debug")
? DEBUG_CHOICES : CHOICES,
true); true);
} }

View file

@ -26,6 +26,7 @@ import java.awt.*;
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */ @author Nathaniel Garson, Tibetan and Himalayan Digital Library */
interface FontConverterConstants interface FontConverterConstants
{ {
final String TMW_TO_SAME_TWM = "TMW to the same TMW (for testing only) (RTF->RTF)";
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)"; final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)"; final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)"; final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
@ -56,6 +57,23 @@ interface FontConverterConstants
FIND_ALL_NON_TM FIND_ALL_NON_TM
}; };
final String[] DEBUG_CHOICES = new String[] {
TMW_TO_SAME_TWM,
ACIP_TO_UNI_TEXT,
ACIP_TO_TMW,
TMW_TO_ACIP,
TMW_TO_ACIP_TEXT,
TM_TO_TMW,
TMW_TO_TM,
TMW_TO_UNI,
TMW_TO_WYLIE,
TMW_TO_WYLIE_TEXT,
FIND_SOME_NON_TMW,
FIND_SOME_NON_TM,
FIND_ALL_NON_TMW,
FIND_ALL_NON_TM
};
final String suggested_WYLIE_prefix = "THDL_Wylie_"; final String suggested_WYLIE_prefix = "THDL_Wylie_";
final String suggested_ACIP_prefix = "ACIP_"; final String suggested_ACIP_prefix = "ACIP_";
final String suggested_TO_TMW_prefix = "TMW_"; final String suggested_TO_TMW_prefix = "TMW_";

View file

@ -66,6 +66,7 @@ public class TibetanConverter implements FontConverterConstants {
* @return the exit code. */ * @return the exit code. */
public static int realMain(String[] args, PrintStream out) { public static int realMain(String[] args, PrintStream out) {
try { try {
boolean convertTmwToTmwMode = false;
boolean convertToUnicodeMode = false; boolean convertToUnicodeMode = false;
boolean convertToTMMode = false; boolean convertToTMMode = false;
boolean convertACIPToUniMode = false; boolean convertACIPToUniMode = false;
@ -105,6 +106,8 @@ public class TibetanConverter implements FontConverterConstants {
|| args[numArgs - 3].equals("long")) || args[numArgs - 3].equals("long"))
|| !((findAllNonTMWMode || !((findAllNonTMWMode
= args[numArgs - 2].equals("--find-all-non-tmw")) = args[numArgs - 2].equals("--find-all-non-tmw"))
|| (convertTmwToTmwMode
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|| (convertToTMMode || (convertToTMMode
= args[numArgs - 2].equals("--to-tibetan-machine")) = args[numArgs - 2].equals("--to-tibetan-machine"))
|| (convertToTMWMode || (convertToTMWMode
@ -140,6 +143,7 @@ public class TibetanConverter implements FontConverterConstants {
out.println(" --warning-level None|Some|Most|All"); out.println(" --warning-level None|Some|Most|All");
out.println(" --acip-to-tibetan-warning-and-error-messages short|long"); out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
out.println(" --find-all-non-tmw | --find-some-non-tmw"); out.println(" --find-all-non-tmw | --find-some-non-tmw");
out.println(" | --tmw-to-tmw-for-testing");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
out.println(" | --to-unicode | --to-wylie | --to-acip"); out.println(" | --to-unicode | --to-wylie | --to-acip");
out.println(" | --to-wylie-text | --to-acip-text"); out.println(" | --to-wylie-text | --to-acip-text");
@ -246,6 +250,8 @@ public class TibetanConverter implements FontConverterConstants {
conversionTag = TMW_TO_ACIP_TEXT; conversionTag = TMW_TO_ACIP_TEXT;
} else if (convertToUnicodeMode) { } else if (convertToUnicodeMode) {
conversionTag = TMW_TO_UNI; conversionTag = TMW_TO_UNI;
} else if (convertTmwToTmwMode) {
conversionTag = TMW_TO_SAME_TWM;
} else if (convertToTMWMode) { } else if (convertToTMWMode) {
conversionTag = TM_TO_TMW; conversionTag = TM_TO_TMW;
} else if (convertACIPToUniMode) { } else if (convertACIPToUniMode) {
@ -394,6 +400,7 @@ public class TibetanConverter implements FontConverterConstants {
int exitCode = 0; int exitCode = 0;
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0) ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
+ ((TMW_TO_SAME_TWM == ct) ? 1 : 0)
+ ((TMW_TO_UNI == ct) ? 1 : 0) + ((TMW_TO_UNI == ct) ? 1 : 0)
+ ((TM_TO_TMW == ct) ? 1 : 0) + ((TM_TO_TMW == ct) ? 1 : 0)
+ ((TMW_TO_ACIP == ct) ? 1 : 0) + ((TMW_TO_ACIP == ct) ? 1 : 0)
@ -402,7 +409,14 @@ public class TibetanConverter implements FontConverterConstants {
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0) + ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
== 1); == 1);
long numAttemptedReplacements[] = new long[] { 0 }; long numAttemptedReplacements[] = new long[] { 0 };
if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) { if (TMW_TO_SAME_TWM == ct) {
// Identity conversion for testing
if (tdoc.identityTmwToTmwConversion(0,
tdoc.getLength(),
numAttemptedReplacements)) {
exitCode = 50;
}
} else if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
// Convert to THDL Wylie: // Convert to THDL Wylie:
if (!tdoc.toWylie(0, if (!tdoc.toWylie(0,
tdoc.getLength(), tdoc.getLength(),

View file

@ -219,13 +219,18 @@ public class TibetanDocument extends DefaultStyledDocument {
* @param color the color in which to insert, which is used if and only * @param color the color in which to insert, which is used if and only
* if {@link #colorsEnabled() colors are enabled} * if {@link #colorsEnabled() colors are enabled}
*/ */
public int insertDuff(int pos, DuffData[] glyphs, Color color) { public int insertDuff(int pos, DuffData[] glyphs, Color color) {
return insertDuff(tibetanFontSize, pos, glyphs, true, color); return insertDuff(tibetanFontSize, pos, glyphs, true, color);
} }
public int insertDuff(int pos, DuffData[] glyphs) { /**
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black); * Inserts a stretch of TibetanMachineWeb data into the document.
} * @param pos the position at which you want to insert text
* @param glyphs the array of Tibetan data you want to insert
*/
public int insertDuff(int pos, DuffData[] glyphs) {
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black);
}
/** /**
* Appends glyph to the end of this document. * Appends glyph to the end of this document.
@ -255,7 +260,7 @@ public class TibetanDocument extends DefaultStyledDocument {
glyph.font, asTMW); glyph.font, asTMW);
} }
/** Replacing can be more efficient than inserting and then /** Replacing can be more efficient than inserting and then
removing. This replaces the glyphs at position [startOffset, removing. This replaces the glyphs at position [startOffset,
endOffset) with data, which is interpreted as TMW glyphs if endOffset) with data, which is interpreted as TMW glyphs if
asTMW is true and as TM glyphs otherwise. The font size for asTMW is true and as TM glyphs otherwise. The font size for
@ -265,16 +270,16 @@ public class TibetanDocument extends DefaultStyledDocument {
private void replaceDuffs(int fontSize, int startOffset, private void replaceDuffs(int fontSize, int startOffset,
int endOffset, String data, int endOffset, String data,
int newFontIndex, boolean asTMW) { int newFontIndex, boolean asTMW) {
MutableAttributeSet mas MutableAttributeSet mas
= ((asTMW) = ((asTMW)
? TibetanMachineWeb.getAttributeSet(newFontIndex) ? TibetanMachineWeb.getAttributeSet(newFontIndex)
: TibetanMachineWeb.getAttributeSetTM(newFontIndex)); : TibetanMachineWeb.getAttributeSetTM(newFontIndex));
StyleConstants.setFontSize(mas, fontSize); StyleConstants.setFontSize(mas, fontSize);
try { try {
replace(startOffset, endOffset - startOffset, data, mas); replace(startOffset, endOffset - startOffset, data, mas);
} catch (BadLocationException ble) { } catch (BadLocationException ble) {
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
} }
} }
/** Replacing can be more efficient than inserting and then /** Replacing can be more efficient than inserting and then
@ -297,25 +302,25 @@ public class TibetanDocument extends DefaultStyledDocument {
} }
} }
private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) { private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
return insertDuff(fontSize, pos, glyphs, asTMW, Color.black); return insertDuff(fontSize, pos, glyphs, asTMW, Color.black);
} }
private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW, Color color) { private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW, Color color) {
if (glyphs == null) if (glyphs == null)
return pos; return pos;
MutableAttributeSet mas; MutableAttributeSet mas;
for (int i=0; i<glyphs.length; i++) { for (int i=0; i<glyphs.length; i++) {
mas = ((asTMW) mas = ((asTMW)
? TibetanMachineWeb.getAttributeSet(glyphs[i].font) ? TibetanMachineWeb.getAttributeSet(glyphs[i].font)
: TibetanMachineWeb.getAttributeSetTM(glyphs[i].font)); : TibetanMachineWeb.getAttributeSetTM(glyphs[i].font));
if (null == mas) if (null == mas)
throw new Error("Cannot insert that DuffData; the font number is too low or too high; perhaps the programmer has asTMW set incorrectly?"); throw new Error("Cannot insert that DuffData; the font number is too low or too high; perhaps the programmer has asTMW set incorrectly?");
appendDuff(fontSize, pos, glyphs[i].text, mas, color); appendDuff(fontSize, pos, glyphs[i].text, mas, color);
pos += glyphs[i].text.length(); pos += glyphs[i].text.length();
} }
return pos; return pos;
} }
/** /**
* Converts the entire document into Extended Wylie. * Converts the entire document into Extended Wylie.
@ -629,7 +634,7 @@ public class TibetanDocument extends DefaultStyledDocument {
*/ */
public boolean convertToTM(int begin, int end, StringBuffer errors, public boolean convertToTM(int begin, int end, StringBuffer errors,
long numAttemptedReplacements[]) { long numAttemptedReplacements[]) {
return convertHelper(begin, end, true, false, errors, null, return convertHelper(begin, end, "TMW->TM", errors, null,
numAttemptedReplacements); numAttemptedReplacements);
} }
@ -651,7 +656,7 @@ public class TibetanDocument extends DefaultStyledDocument {
*/ */
public boolean convertToTMW(int begin, int end, StringBuffer errors, public boolean convertToTMW(int begin, int end, StringBuffer errors,
long numAttemptedReplacements[]) { long numAttemptedReplacements[]) {
return convertHelper(begin, end, false, false, errors, null, return convertHelper(begin, end, "TM->TMW", errors, null,
numAttemptedReplacements); numAttemptedReplacements);
} }
@ -676,7 +681,7 @@ public class TibetanDocument extends DefaultStyledDocument {
public boolean convertToUnicode(int begin, int end, StringBuffer errors, public boolean convertToUnicode(int begin, int end, StringBuffer errors,
String unicodeFont, String unicodeFont,
long numAttemptedReplacements[]) { long numAttemptedReplacements[]) {
return convertHelper(begin, end, false, true, errors, unicodeFont, return convertHelper(begin, end, "TMW->Unicode", errors, unicodeFont,
numAttemptedReplacements); numAttemptedReplacements);
} }
@ -790,8 +795,11 @@ public class TibetanDocument extends DefaultStyledDocument {
return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing"); return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
} }
/** Helper function. Converts TMW->TM if !toUnicode&&toTM, /** Helper function. Converts TMW->TM, TM->TMW, TMW->Unicode, or
TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode. TMW-> the very same TMW [just for testing Java's RTF support]
depending on mode.
@param mode one of "TMW->TMW-identity" (a null conversion for
testing), "TM->TMW", "TMW->TM", or "TMW->Unicode"
@param errors if non-null, then notes about all exceptional @param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer cases will be appended to this StringBuffer
@return false on 100% success, true if any exceptional case @return false on 100% success, true if any exceptional case
@ -799,8 +807,8 @@ public class TibetanDocument extends DefaultStyledDocument {
@see #convertToUnicode(int,int,StringBuffer,String,long[]) @see #convertToUnicode(int,int,StringBuffer,String,long[])
@see #convertToTMW(int,int,StringBuffer,long[]) @see #convertToTMW(int,int,StringBuffer,long[])
@see #convertToTM(int,int,StringBuffer,long[]) */ @see #convertToTM(int,int,StringBuffer,long[]) */
private boolean convertHelper(int begin, int end, boolean toTM, private boolean convertHelper(int begin, int end, String mode,
boolean toUnicode, StringBuffer errors, StringBuffer errors,
String unicodeFont, String unicodeFont,
long numAttemptedReplacements[]) { long numAttemptedReplacements[]) {
// To preserve formatting, we go paragraph by paragraph. // To preserve formatting, we go paragraph by paragraph.
@ -808,6 +816,9 @@ public class TibetanDocument extends DefaultStyledDocument {
// Use positions, not offsets, because our work on paragraph K // Use positions, not offsets, because our work on paragraph K
// will affect the offsets of paragraph K+1. // will affect the offsets of paragraph K+1.
ThdlDebug.verify("TMW->TMW-identity" == mode || "TMW->Unicode" == mode
|| "TM->TMW" == mode || "TMW->TM" == mode);
Position finalEndPos; Position finalEndPos;
if (end < 0) { if (end < 0) {
end = getLength(); end = getLength();
@ -835,13 +846,13 @@ public class TibetanDocument extends DefaultStyledDocument {
noMore = true; noMore = true;
ceh.doErrorWrapup = true; ceh.doErrorWrapup = true;
} }
convertHelperHelper(thisParagraph.getStartOffset(), convertParagraph(thisParagraph.getStartOffset(),
((finalEndPos.getOffset() < p_end) ((finalEndPos.getOffset() < p_end)
? finalEndPos.getOffset() ? finalEndPos.getOffset()
: p_end), : p_end),
toTM, toUnicode, errors, ceh, mode, errors, ceh,
unicodeFont, unicodeFont,
numAttemptedReplacements); numAttemptedReplacements);
} }
if (!ceh.errorReturn if (!ceh.errorReturn
&& pl != getParagraphs(begin, finalEndPos.getOffset()).length) { && pl != getParagraphs(begin, finalEndPos.getOffset()).length) {
@ -860,24 +871,22 @@ public class TibetanDocument extends DefaultStyledDocument {
return ceh.errorReturn; return ceh.errorReturn;
} }
/** See the sole caller, convertHelper. */ /** See the sole caller, {@link #convertHelper}. begin and end
private void convertHelperHelper(int begin, int end, boolean toTM, should specify the bounds of a paragraph. */
boolean toUnicode, StringBuffer errors, private void convertParagraph(int begin, int end, String mode,
ConversionErrorHelper ceh, StringBuffer errors,
String unicodeFont, ConversionErrorHelper ceh,
long numAttemptedReplacements[]) { String unicodeFont,
final boolean debug = false; long numAttemptedReplacements[]) {
if (debug) final int debug = 0;
System.out.println("cHH: [" + begin + ", " + end + ")"); if (debug > 0)
System.out.println("convertParagraph: [" + begin + ", " + end + ")");
// DLC FIXME: here's an idea, a compressor -- use the '-' (ord // DLC FIXME: here's an idea, a compressor -- use the '-' (ord
// 45) or ' ' (ord 32) glyph from the same font as the // 45) or ' ' (ord 32) glyph from the same font as the
// preceding glyph, never others. This reduces the size of a // preceding glyph, never others. This reduces the size of a
// TMW RTF file by a factor of 3 sometimes. To do it, use // TMW RTF file by a factor of 3 sometimes. To do it, use
// this routine, but give it the ability to go from TMW->TMW // this routine, but give it the ability to go from
// and TM->TM. // TMW->compressed-TMW and TM->compressed-TM.
// toTM is ignored when toUnicode is true:
ThdlDebug.verify(!toUnicode || !toTM);
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug"); boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
if (end < 0) if (end < 0)
@ -910,8 +919,10 @@ public class TibetanDocument extends DefaultStyledDocument {
while (i < endPos.getOffset()) { while (i < endPos.getOffset()) {
AttributeSet attr = getCharacterElement(i).getAttributes(); AttributeSet attr = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr); String fontName = StyleConstants.getFontFamily(attr);
int fontNum int fontNum
= ((toTM || toUnicode) = (("TMW->TM" == mode
|| "TMW->Unicode" == mode
|| "TMW->TMW-identity" == mode)
? TibetanMachineWeb.getTMWFontNumber(fontName) ? TibetanMachineWeb.getTMWFontNumber(fontName)
: TibetanMachineWeb.getTMFontNumber(fontName)); : TibetanMachineWeb.getTMFontNumber(fontName));
@ -926,14 +937,18 @@ public class TibetanDocument extends DefaultStyledDocument {
DuffCode dc = null; DuffCode dc = null;
String unicode = null; String unicode = null;
if (toUnicode) { if ("TMW->Unicode" == mode) {
unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1, unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
getText(i,1).charAt(0)); getText(i,1).charAt(0));
} else { } else {
if (toTM) { if ("TMW->TM" == mode) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1, dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0), getText(i,1).charAt(0),
replacementFontIndex); replacementFontIndex);
} else if ("TMW->TMW-identity" == mode) {
dc = TibetanMachineWeb.mapTMWtoItself(fontNum - 1,
getText(i,1).charAt(0),
replacementFontIndex);
} else { } else {
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1, dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
getText(i,1).charAt(0), getText(i,1).charAt(0),
@ -942,7 +957,7 @@ public class TibetanDocument extends DefaultStyledDocument {
} }
if (replacementQueue.length() > 0 if (replacementQueue.length() > 0
&& (mustReplace && (mustReplace
|| ((!toUnicode || (("TMW->Unicode" != mode
&& null != dc && null != dc
&& dc.getFontNum() != replacementFontIndex) && dc.getFontNum() != replacementFontIndex)
|| fontSize != replacementFontSize))) { || fontSize != replacementFontSize))) {
@ -955,7 +970,7 @@ public class TibetanDocument extends DefaultStyledDocument {
// this if-else statement is duplicated below; beware! // this if-else statement is duplicated below; beware!
int endIndex = mustReplace ? mustReplaceUntil : i; int endIndex = mustReplace ? mustReplaceUntil : i;
if (toUnicode) { if ("TMW->Unicode" == mode) {
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue); UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
replaceDuffsWithUnicode(replacementFontSize, replaceDuffsWithUnicode(replacementFontSize,
replacementStartIndex, replacementStartIndex,
@ -968,13 +983,13 @@ public class TibetanDocument extends DefaultStyledDocument {
endIndex, endIndex,
replacementQueue.toString(), replacementQueue.toString(),
replacementFontIndex, replacementFontIndex,
!toTM); mode != "TMW->TM");
} }
// i += numnewchars - numoldchars; // i += numnewchars - numoldchars;
if (debug) if (debug > 10)
System.out.println("Incrementing i by " + (replacementQueue.length() System.out.println("Incrementing i by " + (replacementQueue.length()
- (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex); - (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex);
i += (replacementQueue.length() i += (replacementQueue.length()
- (endIndex - replacementStartIndex)); - (endIndex - replacementStartIndex));
@ -986,13 +1001,13 @@ public class TibetanDocument extends DefaultStyledDocument {
if (0 == replacementQueue.length()) { if (0 == replacementQueue.length()) {
replacementFontSize = fontSize; replacementFontSize = fontSize;
replacementStartIndex = i; replacementStartIndex = i;
if (!toUnicode) { if ("TMW->Unicode" != mode) {
replacementFontIndex = dc.getFontNum(); replacementFontIndex = dc.getFontNum();
} }
} }
if (toUnicode) { if ("TMW->Unicode" == mode) {
replacementQueue.append(unicode); replacementQueue.append(unicode);
if (debug) if (debug > 0)
System.out.println("unicode rq.append: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(unicode)); System.out.println("unicode rq.append: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(unicode));
} else { } else {
replacementQueue.append(dc.getCharacter()); replacementQueue.append(dc.getCharacter());
@ -1011,9 +1026,7 @@ public class TibetanDocument extends DefaultStyledDocument {
ceh.problemGlyphsTable.put(cgf, "yes this character appears once"); ceh.problemGlyphsTable.put(cgf, "yes this character appears once");
if (null != errors) { if (null != errors) {
String err String err
= (toUnicode = mode
? "TMW->Unicode"
: (toTM ? "TMW->TM" : "TM->TMW"))
+ " conversion failed for a glyph:\nFont is " + " conversion failed for a glyph:\nFont is "
+ fontName + ", glyph number is " + fontName + ", glyph number is "
+ (int)getText(i,1).charAt(0) + (int)getText(i,1).charAt(0)
@ -1028,9 +1041,15 @@ public class TibetanDocument extends DefaultStyledDocument {
// the beginning of the document, // the beginning of the document,
// after a 'a' character (i.e., // after a 'a' character (i.e.,
// \tm0062 or \tmw0063): // \tm0062 or \tmw0063):
equivalent[0].setData((toUnicode || toTM) ? (char)63 : (char)62, 1); equivalent[0].setData((("TMW->Unicode" == mode
|| "TMW->TM" == mode)
? (char)63 : (char)62),
1);
insertDuff(72, ceh.errorGlyphLocation++, insertDuff(72, ceh.errorGlyphLocation++,
equivalent, toUnicode || toTM); equivalent,
("TMW->Unicode" == mode
|| "TMW->TMW-identity" == mode
|| "TMW->TM" == mode));
++i; ++i;
// Don't later replace this last guy: // Don't later replace this last guy:
if (replacementStartIndex < ceh.errorGlyphLocation) { if (replacementStartIndex < ceh.errorGlyphLocation) {
@ -1038,7 +1057,10 @@ public class TibetanDocument extends DefaultStyledDocument {
} }
equivalent[0].setData(getText(i,1), fontNum); equivalent[0].setData(getText(i,1), fontNum);
insertDuff(72, ceh.errorGlyphLocation++, insertDuff(72, ceh.errorGlyphLocation++,
equivalent, toUnicode || toTM); equivalent,
("TMW->Unicode" == mode
|| "TMW->TMW-identity" == mode
|| "TMW->TM" == mode));
++i; ++i;
// Don't later replace this last guy: // Don't later replace this last guy:
if (replacementStartIndex < ceh.errorGlyphLocation) { if (replacementStartIndex < ceh.errorGlyphLocation) {
@ -1056,7 +1078,10 @@ public class TibetanDocument extends DefaultStyledDocument {
} }
} }
} else { } else {
if (debug) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0)); // FIXME: are we doing the right thing here? I
// think so -- I think we're just not replacing
// the current character, but I'm not at all sure.
if (debug > 0) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0));
if (replacementQueue.length() > 0) { if (replacementQueue.length() > 0) {
if (!mustReplace) { if (!mustReplace) {
mustReplaceUntil = i; mustReplaceUntil = i;
@ -1069,14 +1094,14 @@ public class TibetanDocument extends DefaultStyledDocument {
if (replacementQueue.length() > 0) { if (replacementQueue.length() > 0) {
// this if-else statement is duplicated above; beware! // this if-else statement is duplicated above; beware!
int endIndex = mustReplace ? mustReplaceUntil : i; int endIndex = mustReplace ? mustReplaceUntil : i;
if (toUnicode) { if ("TMW->Unicode" == mode) {
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue); UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
replaceDuffsWithUnicode(replacementFontSize, replaceDuffsWithUnicode(replacementFontSize,
replacementStartIndex, replacementStartIndex,
endIndex, endIndex,
replacementQueue.toString(), replacementQueue.toString(),
unicodeFont); unicodeFont);
if (debug) if (debug > 0)
System.out.println("unicode rq: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(replacementQueue.toString())); System.out.println("unicode rq: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(replacementQueue.toString()));
} else { } else {
replaceDuffs(replacementFontSize, replaceDuffs(replacementFontSize,
@ -1084,7 +1109,7 @@ public class TibetanDocument extends DefaultStyledDocument {
endIndex, endIndex,
replacementQueue.toString(), replacementQueue.toString(),
replacementFontIndex, replacementFontIndex,
!toTM); "TMW->TM" != mode);
} }
} }
ceh.lastOffsetExamined = endPos.getOffset() - 1; ceh.lastOffsetExamined = endPos.getOffset() - 1;
@ -1092,7 +1117,9 @@ public class TibetanDocument extends DefaultStyledDocument {
if (ceh.doErrorWrapup && ceh.errorGlyphLocation > 0) { if (ceh.doErrorWrapup && ceh.errorGlyphLocation > 0) {
// Bracket the bad stuff with U+0F3C on the left // Bracket the bad stuff with U+0F3C on the left
// and U+0F3D on the right: // and U+0F3D on the right:
if (!(toUnicode || toTM)) { if (!("TMW->Unicode" == mode
|| "TMW->TM" == mode
|| "TMW->TMW-identity" == mode)) {
equivalent[0].setData((char)209, 1); equivalent[0].setData((char)209, 1);
insertDuff(72, ceh.errorGlyphLocation++, insertDuff(72, ceh.errorGlyphLocation++,
equivalent, false); equivalent, false);
@ -1254,6 +1281,30 @@ public class TibetanDocument extends DefaultStyledDocument {
} }
} }
/**
To test Java's RTF support, it's helpful to just try and do an
identity TMW->TMW transformation (you can think of it as a
converter that converts nothing). I'm curious to see if the
problem we have with TMW->Unicode conversions failing to
preserve whitespace is a bug in our code or a bug in Java's RTF
support, and this provides one data point.
@return false on 100% success, true if any exceptional case was
encountered
@exception Error if start or end is out of range */
public boolean identityTmwToTmwConversion(int start,
int end,
long numAttemptedReplacements[]) {
StringBuffer errors = new StringBuffer();
boolean r = convertHelper(start, end, "TMW->TMW-identity",
errors, "Unicode Font should not be used",
numAttemptedReplacements);
System.err.println("<TMW_TO_SAME_TWM-errors>");
System.err.println(errors.toString());
System.err.println("</TMW_TO_SAME_TWM-errors>");
return r;
}
/** Returns all the paragraph elements in this document that /** Returns all the paragraph elements in this document that
* contain glyphs with offsets in the range [start, end) where * contain glyphs with offsets in the range [start, end) where
* end < 0 is treated as the document's length. Note that roman, * end < 0 is treated as the document's length. Note that roman,

View file

@ -1372,6 +1372,39 @@ private static final DuffCode TMW_cr = new DuffCode(1, '\r');
private static final DuffCode TMW_lf = new DuffCode(1, '\n'); private static final DuffCode TMW_lf = new DuffCode(1, '\n');
private static final DuffCode TMW_tab = new DuffCode(1, '\t'); private static final DuffCode TMW_tab = new DuffCode(1, '\t');
/** An identity function used merely for testing. */
public static DuffCode mapTMWtoItself(int font, int ordinal, int suggestedFont) {
if (font < 0 || font > 9)
return null;
if (ordinal >= 255) {
throw new Error("I didn't know that ever happened.");
}
if (ordinal < 32) {
if (ordinal == (int)'\r') {
if (0 == suggestedFont)
return TMW_cr;
else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\n') {
if (0 == suggestedFont)
return TMW_lf;
else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\t') {
if (0 == suggestedFont)
return TMW_tab;
else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else {
// for robustness, just return font 1, char ordinal.
ThdlDebug.noteIffyCode();
return null;
}
}
return new DuffCode(font + 1, (char)ordinal);
}
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding /** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
to the given TibetanMachine font to the given TibetanMachine font
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254). (0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
@ -1406,17 +1439,17 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
if (0 == suggestedFont) if (0 == suggestedFont)
return TMW_cr; return TMW_cr;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\n') { } else if (ordinal == (int)'\n') {
if (0 == suggestedFont) if (0 == suggestedFont)
return TMW_lf; return TMW_lf;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\t') { } else if (ordinal == (int)'\t') {
if (0 == suggestedFont) if (0 == suggestedFont)
return TMW_tab; return TMW_tab;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else { } else {
// for robustness, just return font 1, char ordinal. // for robustness, just return font 1, char ordinal.
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
@ -1430,7 +1463,7 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
// assume that a machine goofed along the way. (FIXME: optionally // assume that a machine goofed along the way. (FIXME: optionally
// warn.) // warn.)
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) { if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} }
return TMtoTMW[font][ordinal-32]; return TMtoTMW[font][ordinal-32];
} }
@ -1465,17 +1498,17 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
if (0 == suggestedFont) if (0 == suggestedFont)
return TM_cr; return TM_cr;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\n') { } else if (ordinal == (int)'\n') {
if (0 == suggestedFont) if (0 == suggestedFont)
return TM_lf; return TM_lf;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else if (ordinal == (int)'\t') { } else if (ordinal == (int)'\t') {
if (0 == suggestedFont) if (0 == suggestedFont)
return TM_tab; return TM_tab;
else else
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} else { } else {
// for robustness, just return font 1, char ordinal. // for robustness, just return font 1, char ordinal.
ThdlDebug.noteIffyCode(); ThdlDebug.noteIffyCode();
@ -1488,7 +1521,7 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
return new DuffCode(1, (char)ordinal); return new DuffCode(1, (char)ordinal);
} }
if ((0 != suggestedFont) && (32 == ordinal)) { if ((0 != suggestedFont) && (32 == ordinal)) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
} }
DuffCode ans = TMWtoTM[font][ordinal-32]; DuffCode ans = TMWtoTM[font][ordinal-32];
return ans; return ans;