Formatting in TMW documents is not preserved. I've added an identity
tranformation, TMW->TMW, to help me debug this problem.
This commit is contained in:
parent
9025fb42d6
commit
83f499b7a8
6 changed files with 218 additions and 91 deletions
|
@ -457,7 +457,10 @@ class ConvertDialog extends JDialog
|
||||||
} else if (FIND_ALL_NON_TM == ct) {
|
} else if (FIND_ALL_NON_TM == ct) {
|
||||||
newFileNamePrefix = "AllNonTM__";
|
newFileNamePrefix = "AllNonTM__";
|
||||||
newFileNameExtension = ".TXT";
|
newFileNameExtension = ".TXT";
|
||||||
} else { // conversion {to Wylie or TM} mode
|
} else if (TMW_TO_SAME_TWM == ct) {
|
||||||
|
newFileNamePrefix = "TMW_to_same_TMW__";
|
||||||
|
newFileNameExtension = ".RTF";
|
||||||
|
} else { // conversion mode
|
||||||
if (TMW_TO_WYLIE == ct) {
|
if (TMW_TO_WYLIE == ct) {
|
||||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||||
} else if (TMW_TO_WYLIE_TEXT == ct) {
|
} else if (TMW_TO_WYLIE_TEXT == ct) {
|
||||||
|
|
|
@ -93,6 +93,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
|
||||||
"Attention required",
|
"Attention required",
|
||||||
JOptionPane.ERROR_MESSAGE);
|
JOptionPane.ERROR_MESSAGE);
|
||||||
return false;
|
return false;
|
||||||
|
} else if (50 == returnCode) {
|
||||||
|
JOptionPane.showMessageDialog(cd,
|
||||||
|
"Error doing RTF->RTF identity copy.",
|
||||||
|
"Attention required",
|
||||||
|
JOptionPane.ERROR_MESSAGE);
|
||||||
|
return false;
|
||||||
} else if (43 == returnCode) {
|
} else if (43 == returnCode) {
|
||||||
JOptionPane.showMessageDialog(cd,
|
JOptionPane.showMessageDialog(cd,
|
||||||
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
|
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
|
||||||
|
@ -192,13 +198,15 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
|
||||||
if (null == owner) {
|
if (null == owner) {
|
||||||
convDialog
|
convDialog
|
||||||
= new ConvertDialog(new ConverterGUI(),
|
= new ConvertDialog(new ConverterGUI(),
|
||||||
CHOICES,
|
ThdlOptions.getBooleanOption("thdl.debug")
|
||||||
|
? DEBUG_CHOICES : CHOICES,
|
||||||
true);
|
true);
|
||||||
} else {
|
} else {
|
||||||
convDialog
|
convDialog
|
||||||
= new ConvertDialog(owner,
|
= new ConvertDialog(owner,
|
||||||
new ConverterGUI(),
|
new ConverterGUI(),
|
||||||
CHOICES,
|
ThdlOptions.getBooleanOption("thdl.debug")
|
||||||
|
? DEBUG_CHOICES : CHOICES,
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.awt.*;
|
||||||
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
@author Nathaniel Garson, Tibetan and Himalayan Digital Library */
|
||||||
interface FontConverterConstants
|
interface FontConverterConstants
|
||||||
{
|
{
|
||||||
|
final String TMW_TO_SAME_TWM = "TMW to the same TMW (for testing only) (RTF->RTF)";
|
||||||
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
|
final String ACIP_TO_UNI_TEXT = "ACIP to Unicode (Text->Text)";
|
||||||
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
|
final String ACIP_TO_TMW = "ACIP to TMW (Text->RTF)";
|
||||||
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
|
final String TMW_TO_ACIP = "TMW to ACIP (RTF->RTF)";
|
||||||
|
@ -56,6 +57,23 @@ interface FontConverterConstants
|
||||||
FIND_ALL_NON_TM
|
FIND_ALL_NON_TM
|
||||||
};
|
};
|
||||||
|
|
||||||
|
final String[] DEBUG_CHOICES = new String[] {
|
||||||
|
TMW_TO_SAME_TWM,
|
||||||
|
ACIP_TO_UNI_TEXT,
|
||||||
|
ACIP_TO_TMW,
|
||||||
|
TMW_TO_ACIP,
|
||||||
|
TMW_TO_ACIP_TEXT,
|
||||||
|
TM_TO_TMW,
|
||||||
|
TMW_TO_TM,
|
||||||
|
TMW_TO_UNI,
|
||||||
|
TMW_TO_WYLIE,
|
||||||
|
TMW_TO_WYLIE_TEXT,
|
||||||
|
FIND_SOME_NON_TMW,
|
||||||
|
FIND_SOME_NON_TM,
|
||||||
|
FIND_ALL_NON_TMW,
|
||||||
|
FIND_ALL_NON_TM
|
||||||
|
};
|
||||||
|
|
||||||
final String suggested_WYLIE_prefix = "THDL_Wylie_";
|
final String suggested_WYLIE_prefix = "THDL_Wylie_";
|
||||||
final String suggested_ACIP_prefix = "ACIP_";
|
final String suggested_ACIP_prefix = "ACIP_";
|
||||||
final String suggested_TO_TMW_prefix = "TMW_";
|
final String suggested_TO_TMW_prefix = "TMW_";
|
||||||
|
|
|
@ -66,6 +66,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
* @return the exit code. */
|
* @return the exit code. */
|
||||||
public static int realMain(String[] args, PrintStream out) {
|
public static int realMain(String[] args, PrintStream out) {
|
||||||
try {
|
try {
|
||||||
|
boolean convertTmwToTmwMode = false;
|
||||||
boolean convertToUnicodeMode = false;
|
boolean convertToUnicodeMode = false;
|
||||||
boolean convertToTMMode = false;
|
boolean convertToTMMode = false;
|
||||||
boolean convertACIPToUniMode = false;
|
boolean convertACIPToUniMode = false;
|
||||||
|
@ -105,6 +106,8 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
|| args[numArgs - 3].equals("long"))
|
|| args[numArgs - 3].equals("long"))
|
||||||
|| !((findAllNonTMWMode
|
|| !((findAllNonTMWMode
|
||||||
= args[numArgs - 2].equals("--find-all-non-tmw"))
|
= args[numArgs - 2].equals("--find-all-non-tmw"))
|
||||||
|
|| (convertTmwToTmwMode
|
||||||
|
= args[numArgs - 2].equals("--tmw-to-tmw-for-testing"))
|
||||||
|| (convertToTMMode
|
|| (convertToTMMode
|
||||||
= args[numArgs - 2].equals("--to-tibetan-machine"))
|
= args[numArgs - 2].equals("--to-tibetan-machine"))
|
||||||
|| (convertToTMWMode
|
|| (convertToTMWMode
|
||||||
|
@ -140,6 +143,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
out.println(" --warning-level None|Some|Most|All");
|
out.println(" --warning-level None|Some|Most|All");
|
||||||
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
|
out.println(" --acip-to-tibetan-warning-and-error-messages short|long");
|
||||||
out.println(" --find-all-non-tmw | --find-some-non-tmw");
|
out.println(" --find-all-non-tmw | --find-some-non-tmw");
|
||||||
|
out.println(" | --tmw-to-tmw-for-testing");
|
||||||
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
||||||
out.println(" | --to-unicode | --to-wylie | --to-acip");
|
out.println(" | --to-unicode | --to-wylie | --to-acip");
|
||||||
out.println(" | --to-wylie-text | --to-acip-text");
|
out.println(" | --to-wylie-text | --to-acip-text");
|
||||||
|
@ -246,6 +250,8 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
conversionTag = TMW_TO_ACIP_TEXT;
|
conversionTag = TMW_TO_ACIP_TEXT;
|
||||||
} else if (convertToUnicodeMode) {
|
} else if (convertToUnicodeMode) {
|
||||||
conversionTag = TMW_TO_UNI;
|
conversionTag = TMW_TO_UNI;
|
||||||
|
} else if (convertTmwToTmwMode) {
|
||||||
|
conversionTag = TMW_TO_SAME_TWM;
|
||||||
} else if (convertToTMWMode) {
|
} else if (convertToTMWMode) {
|
||||||
conversionTag = TM_TO_TMW;
|
conversionTag = TM_TO_TMW;
|
||||||
} else if (convertACIPToUniMode) {
|
} else if (convertACIPToUniMode) {
|
||||||
|
@ -394,6 +400,7 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
|
|
||||||
int exitCode = 0;
|
int exitCode = 0;
|
||||||
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
|
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
|
||||||
|
+ ((TMW_TO_SAME_TWM == ct) ? 1 : 0)
|
||||||
+ ((TMW_TO_UNI == ct) ? 1 : 0)
|
+ ((TMW_TO_UNI == ct) ? 1 : 0)
|
||||||
+ ((TM_TO_TMW == ct) ? 1 : 0)
|
+ ((TM_TO_TMW == ct) ? 1 : 0)
|
||||||
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
|
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
|
||||||
|
@ -402,7 +409,14 @@ public class TibetanConverter implements FontConverterConstants {
|
||||||
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
|
+ ((TMW_TO_WYLIE_TEXT == ct) ? 1 : 0)
|
||||||
== 1);
|
== 1);
|
||||||
long numAttemptedReplacements[] = new long[] { 0 };
|
long numAttemptedReplacements[] = new long[] { 0 };
|
||||||
if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
|
if (TMW_TO_SAME_TWM == ct) {
|
||||||
|
// Identity conversion for testing
|
||||||
|
if (tdoc.identityTmwToTmwConversion(0,
|
||||||
|
tdoc.getLength(),
|
||||||
|
numAttemptedReplacements)) {
|
||||||
|
exitCode = 50;
|
||||||
|
}
|
||||||
|
} else if (TMW_TO_WYLIE == ct || TMW_TO_WYLIE_TEXT == ct) {
|
||||||
// Convert to THDL Wylie:
|
// Convert to THDL Wylie:
|
||||||
if (!tdoc.toWylie(0,
|
if (!tdoc.toWylie(0,
|
||||||
tdoc.getLength(),
|
tdoc.getLength(),
|
||||||
|
|
|
@ -223,6 +223,11 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
return insertDuff(tibetanFontSize, pos, glyphs, true, color);
|
return insertDuff(tibetanFontSize, pos, glyphs, true, color);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts a stretch of TibetanMachineWeb data into the document.
|
||||||
|
* @param pos the position at which you want to insert text
|
||||||
|
* @param glyphs the array of Tibetan data you want to insert
|
||||||
|
*/
|
||||||
public int insertDuff(int pos, DuffData[] glyphs) {
|
public int insertDuff(int pos, DuffData[] glyphs) {
|
||||||
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black);
|
return insertDuff(tibetanFontSize, pos, glyphs, true, Color.black);
|
||||||
}
|
}
|
||||||
|
@ -629,7 +634,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
*/
|
*/
|
||||||
public boolean convertToTM(int begin, int end, StringBuffer errors,
|
public boolean convertToTM(int begin, int end, StringBuffer errors,
|
||||||
long numAttemptedReplacements[]) {
|
long numAttemptedReplacements[]) {
|
||||||
return convertHelper(begin, end, true, false, errors, null,
|
return convertHelper(begin, end, "TMW->TM", errors, null,
|
||||||
numAttemptedReplacements);
|
numAttemptedReplacements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -651,7 +656,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
*/
|
*/
|
||||||
public boolean convertToTMW(int begin, int end, StringBuffer errors,
|
public boolean convertToTMW(int begin, int end, StringBuffer errors,
|
||||||
long numAttemptedReplacements[]) {
|
long numAttemptedReplacements[]) {
|
||||||
return convertHelper(begin, end, false, false, errors, null,
|
return convertHelper(begin, end, "TM->TMW", errors, null,
|
||||||
numAttemptedReplacements);
|
numAttemptedReplacements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -676,7 +681,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
public boolean convertToUnicode(int begin, int end, StringBuffer errors,
|
public boolean convertToUnicode(int begin, int end, StringBuffer errors,
|
||||||
String unicodeFont,
|
String unicodeFont,
|
||||||
long numAttemptedReplacements[]) {
|
long numAttemptedReplacements[]) {
|
||||||
return convertHelper(begin, end, false, true, errors, unicodeFont,
|
return convertHelper(begin, end, "TMW->Unicode", errors, unicodeFont,
|
||||||
numAttemptedReplacements);
|
numAttemptedReplacements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -790,8 +795,11 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
|
return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Helper function. Converts TMW->TM if !toUnicode&&toTM,
|
/** Helper function. Converts TMW->TM, TM->TMW, TMW->Unicode, or
|
||||||
TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode.
|
TMW-> the very same TMW [just for testing Java's RTF support]
|
||||||
|
depending on mode.
|
||||||
|
@param mode one of "TMW->TMW-identity" (a null conversion for
|
||||||
|
testing), "TM->TMW", "TMW->TM", or "TMW->Unicode"
|
||||||
@param errors if non-null, then notes about all exceptional
|
@param errors if non-null, then notes about all exceptional
|
||||||
cases will be appended to this StringBuffer
|
cases will be appended to this StringBuffer
|
||||||
@return false on 100% success, true if any exceptional case
|
@return false on 100% success, true if any exceptional case
|
||||||
|
@ -799,8 +807,8 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
@see #convertToUnicode(int,int,StringBuffer,String,long[])
|
@see #convertToUnicode(int,int,StringBuffer,String,long[])
|
||||||
@see #convertToTMW(int,int,StringBuffer,long[])
|
@see #convertToTMW(int,int,StringBuffer,long[])
|
||||||
@see #convertToTM(int,int,StringBuffer,long[]) */
|
@see #convertToTM(int,int,StringBuffer,long[]) */
|
||||||
private boolean convertHelper(int begin, int end, boolean toTM,
|
private boolean convertHelper(int begin, int end, String mode,
|
||||||
boolean toUnicode, StringBuffer errors,
|
StringBuffer errors,
|
||||||
String unicodeFont,
|
String unicodeFont,
|
||||||
long numAttemptedReplacements[]) {
|
long numAttemptedReplacements[]) {
|
||||||
// To preserve formatting, we go paragraph by paragraph.
|
// To preserve formatting, we go paragraph by paragraph.
|
||||||
|
@ -808,6 +816,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
// Use positions, not offsets, because our work on paragraph K
|
// Use positions, not offsets, because our work on paragraph K
|
||||||
// will affect the offsets of paragraph K+1.
|
// will affect the offsets of paragraph K+1.
|
||||||
|
|
||||||
|
ThdlDebug.verify("TMW->TMW-identity" == mode || "TMW->Unicode" == mode
|
||||||
|
|| "TM->TMW" == mode || "TMW->TM" == mode);
|
||||||
|
|
||||||
Position finalEndPos;
|
Position finalEndPos;
|
||||||
if (end < 0) {
|
if (end < 0) {
|
||||||
end = getLength();
|
end = getLength();
|
||||||
|
@ -835,11 +846,11 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
noMore = true;
|
noMore = true;
|
||||||
ceh.doErrorWrapup = true;
|
ceh.doErrorWrapup = true;
|
||||||
}
|
}
|
||||||
convertHelperHelper(thisParagraph.getStartOffset(),
|
convertParagraph(thisParagraph.getStartOffset(),
|
||||||
((finalEndPos.getOffset() < p_end)
|
((finalEndPos.getOffset() < p_end)
|
||||||
? finalEndPos.getOffset()
|
? finalEndPos.getOffset()
|
||||||
: p_end),
|
: p_end),
|
||||||
toTM, toUnicode, errors, ceh,
|
mode, errors, ceh,
|
||||||
unicodeFont,
|
unicodeFont,
|
||||||
numAttemptedReplacements);
|
numAttemptedReplacements);
|
||||||
}
|
}
|
||||||
|
@ -860,24 +871,22 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
return ceh.errorReturn;
|
return ceh.errorReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** See the sole caller, convertHelper. */
|
/** See the sole caller, {@link #convertHelper}. begin and end
|
||||||
private void convertHelperHelper(int begin, int end, boolean toTM,
|
should specify the bounds of a paragraph. */
|
||||||
boolean toUnicode, StringBuffer errors,
|
private void convertParagraph(int begin, int end, String mode,
|
||||||
|
StringBuffer errors,
|
||||||
ConversionErrorHelper ceh,
|
ConversionErrorHelper ceh,
|
||||||
String unicodeFont,
|
String unicodeFont,
|
||||||
long numAttemptedReplacements[]) {
|
long numAttemptedReplacements[]) {
|
||||||
final boolean debug = false;
|
final int debug = 0;
|
||||||
if (debug)
|
if (debug > 0)
|
||||||
System.out.println("cHH: [" + begin + ", " + end + ")");
|
System.out.println("convertParagraph: [" + begin + ", " + end + ")");
|
||||||
// DLC FIXME: here's an idea, a compressor -- use the '-' (ord
|
// DLC FIXME: here's an idea, a compressor -- use the '-' (ord
|
||||||
// 45) or ' ' (ord 32) glyph from the same font as the
|
// 45) or ' ' (ord 32) glyph from the same font as the
|
||||||
// preceding glyph, never others. This reduces the size of a
|
// preceding glyph, never others. This reduces the size of a
|
||||||
// TMW RTF file by a factor of 3 sometimes. To do it, use
|
// TMW RTF file by a factor of 3 sometimes. To do it, use
|
||||||
// this routine, but give it the ability to go from TMW->TMW
|
// this routine, but give it the ability to go from
|
||||||
// and TM->TM.
|
// TMW->compressed-TMW and TM->compressed-TM.
|
||||||
|
|
||||||
// toTM is ignored when toUnicode is true:
|
|
||||||
ThdlDebug.verify(!toUnicode || !toTM);
|
|
||||||
|
|
||||||
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
|
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
|
||||||
if (end < 0)
|
if (end < 0)
|
||||||
|
@ -911,7 +920,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
AttributeSet attr = getCharacterElement(i).getAttributes();
|
AttributeSet attr = getCharacterElement(i).getAttributes();
|
||||||
String fontName = StyleConstants.getFontFamily(attr);
|
String fontName = StyleConstants.getFontFamily(attr);
|
||||||
int fontNum
|
int fontNum
|
||||||
= ((toTM || toUnicode)
|
= (("TMW->TM" == mode
|
||||||
|
|| "TMW->Unicode" == mode
|
||||||
|
|| "TMW->TMW-identity" == mode)
|
||||||
? TibetanMachineWeb.getTMWFontNumber(fontName)
|
? TibetanMachineWeb.getTMWFontNumber(fontName)
|
||||||
: TibetanMachineWeb.getTMFontNumber(fontName));
|
: TibetanMachineWeb.getTMFontNumber(fontName));
|
||||||
|
|
||||||
|
@ -926,14 +937,18 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
|
|
||||||
DuffCode dc = null;
|
DuffCode dc = null;
|
||||||
String unicode = null;
|
String unicode = null;
|
||||||
if (toUnicode) {
|
if ("TMW->Unicode" == mode) {
|
||||||
unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
|
unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
|
||||||
getText(i,1).charAt(0));
|
getText(i,1).charAt(0));
|
||||||
} else {
|
} else {
|
||||||
if (toTM) {
|
if ("TMW->TM" == mode) {
|
||||||
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
||||||
getText(i,1).charAt(0),
|
getText(i,1).charAt(0),
|
||||||
replacementFontIndex);
|
replacementFontIndex);
|
||||||
|
} else if ("TMW->TMW-identity" == mode) {
|
||||||
|
dc = TibetanMachineWeb.mapTMWtoItself(fontNum - 1,
|
||||||
|
getText(i,1).charAt(0),
|
||||||
|
replacementFontIndex);
|
||||||
} else {
|
} else {
|
||||||
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
|
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
|
||||||
getText(i,1).charAt(0),
|
getText(i,1).charAt(0),
|
||||||
|
@ -942,7 +957,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
}
|
}
|
||||||
if (replacementQueue.length() > 0
|
if (replacementQueue.length() > 0
|
||||||
&& (mustReplace
|
&& (mustReplace
|
||||||
|| ((!toUnicode
|
|| (("TMW->Unicode" != mode
|
||||||
&& null != dc
|
&& null != dc
|
||||||
&& dc.getFontNum() != replacementFontIndex)
|
&& dc.getFontNum() != replacementFontIndex)
|
||||||
|| fontSize != replacementFontSize))) {
|
|| fontSize != replacementFontSize))) {
|
||||||
|
@ -955,7 +970,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
|
|
||||||
// this if-else statement is duplicated below; beware!
|
// this if-else statement is duplicated below; beware!
|
||||||
int endIndex = mustReplace ? mustReplaceUntil : i;
|
int endIndex = mustReplace ? mustReplaceUntil : i;
|
||||||
if (toUnicode) {
|
if ("TMW->Unicode" == mode) {
|
||||||
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
|
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
|
||||||
replaceDuffsWithUnicode(replacementFontSize,
|
replaceDuffsWithUnicode(replacementFontSize,
|
||||||
replacementStartIndex,
|
replacementStartIndex,
|
||||||
|
@ -968,11 +983,11 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
endIndex,
|
endIndex,
|
||||||
replacementQueue.toString(),
|
replacementQueue.toString(),
|
||||||
replacementFontIndex,
|
replacementFontIndex,
|
||||||
!toTM);
|
mode != "TMW->TM");
|
||||||
}
|
}
|
||||||
|
|
||||||
// i += numnewchars - numoldchars;
|
// i += numnewchars - numoldchars;
|
||||||
if (debug)
|
if (debug > 10)
|
||||||
System.out.println("Incrementing i by " + (replacementQueue.length()
|
System.out.println("Incrementing i by " + (replacementQueue.length()
|
||||||
- (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex);
|
- (endIndex - replacementStartIndex)) + "; replaced a patch with font size " + replacementFontSize + ", fontindex " + replacementFontIndex);
|
||||||
i += (replacementQueue.length()
|
i += (replacementQueue.length()
|
||||||
|
@ -986,13 +1001,13 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
if (0 == replacementQueue.length()) {
|
if (0 == replacementQueue.length()) {
|
||||||
replacementFontSize = fontSize;
|
replacementFontSize = fontSize;
|
||||||
replacementStartIndex = i;
|
replacementStartIndex = i;
|
||||||
if (!toUnicode) {
|
if ("TMW->Unicode" != mode) {
|
||||||
replacementFontIndex = dc.getFontNum();
|
replacementFontIndex = dc.getFontNum();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (toUnicode) {
|
if ("TMW->Unicode" == mode) {
|
||||||
replacementQueue.append(unicode);
|
replacementQueue.append(unicode);
|
||||||
if (debug)
|
if (debug > 0)
|
||||||
System.out.println("unicode rq.append: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(unicode));
|
System.out.println("unicode rq.append: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(unicode));
|
||||||
} else {
|
} else {
|
||||||
replacementQueue.append(dc.getCharacter());
|
replacementQueue.append(dc.getCharacter());
|
||||||
|
@ -1011,9 +1026,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
ceh.problemGlyphsTable.put(cgf, "yes this character appears once");
|
ceh.problemGlyphsTable.put(cgf, "yes this character appears once");
|
||||||
if (null != errors) {
|
if (null != errors) {
|
||||||
String err
|
String err
|
||||||
= (toUnicode
|
= mode
|
||||||
? "TMW->Unicode"
|
|
||||||
: (toTM ? "TMW->TM" : "TM->TMW"))
|
|
||||||
+ " conversion failed for a glyph:\nFont is "
|
+ " conversion failed for a glyph:\nFont is "
|
||||||
+ fontName + ", glyph number is "
|
+ fontName + ", glyph number is "
|
||||||
+ (int)getText(i,1).charAt(0)
|
+ (int)getText(i,1).charAt(0)
|
||||||
|
@ -1028,9 +1041,15 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
// the beginning of the document,
|
// the beginning of the document,
|
||||||
// after a 'a' character (i.e.,
|
// after a 'a' character (i.e.,
|
||||||
// \tm0062 or \tmw0063):
|
// \tm0062 or \tmw0063):
|
||||||
equivalent[0].setData((toUnicode || toTM) ? (char)63 : (char)62, 1);
|
equivalent[0].setData((("TMW->Unicode" == mode
|
||||||
|
|| "TMW->TM" == mode)
|
||||||
|
? (char)63 : (char)62),
|
||||||
|
1);
|
||||||
insertDuff(72, ceh.errorGlyphLocation++,
|
insertDuff(72, ceh.errorGlyphLocation++,
|
||||||
equivalent, toUnicode || toTM);
|
equivalent,
|
||||||
|
("TMW->Unicode" == mode
|
||||||
|
|| "TMW->TMW-identity" == mode
|
||||||
|
|| "TMW->TM" == mode));
|
||||||
++i;
|
++i;
|
||||||
// Don't later replace this last guy:
|
// Don't later replace this last guy:
|
||||||
if (replacementStartIndex < ceh.errorGlyphLocation) {
|
if (replacementStartIndex < ceh.errorGlyphLocation) {
|
||||||
|
@ -1038,7 +1057,10 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
}
|
}
|
||||||
equivalent[0].setData(getText(i,1), fontNum);
|
equivalent[0].setData(getText(i,1), fontNum);
|
||||||
insertDuff(72, ceh.errorGlyphLocation++,
|
insertDuff(72, ceh.errorGlyphLocation++,
|
||||||
equivalent, toUnicode || toTM);
|
equivalent,
|
||||||
|
("TMW->Unicode" == mode
|
||||||
|
|| "TMW->TMW-identity" == mode
|
||||||
|
|| "TMW->TM" == mode));
|
||||||
++i;
|
++i;
|
||||||
// Don't later replace this last guy:
|
// Don't later replace this last guy:
|
||||||
if (replacementStartIndex < ceh.errorGlyphLocation) {
|
if (replacementStartIndex < ceh.errorGlyphLocation) {
|
||||||
|
@ -1056,7 +1078,10 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (debug) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0));
|
// FIXME: are we doing the right thing here? I
|
||||||
|
// think so -- I think we're just not replacing
|
||||||
|
// the current character, but I'm not at all sure.
|
||||||
|
if (debug > 0) System.out.println("non-tm/tmw found at offset " + i + "; font=" + fontName + " ord " + (int)getText(i,1).charAt(0));
|
||||||
if (replacementQueue.length() > 0) {
|
if (replacementQueue.length() > 0) {
|
||||||
if (!mustReplace) {
|
if (!mustReplace) {
|
||||||
mustReplaceUntil = i;
|
mustReplaceUntil = i;
|
||||||
|
@ -1069,14 +1094,14 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
if (replacementQueue.length() > 0) {
|
if (replacementQueue.length() > 0) {
|
||||||
// this if-else statement is duplicated above; beware!
|
// this if-else statement is duplicated above; beware!
|
||||||
int endIndex = mustReplace ? mustReplaceUntil : i;
|
int endIndex = mustReplace ? mustReplaceUntil : i;
|
||||||
if (toUnicode) {
|
if ("TMW->Unicode" == mode) {
|
||||||
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
|
UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(replacementQueue);
|
||||||
replaceDuffsWithUnicode(replacementFontSize,
|
replaceDuffsWithUnicode(replacementFontSize,
|
||||||
replacementStartIndex,
|
replacementStartIndex,
|
||||||
endIndex,
|
endIndex,
|
||||||
replacementQueue.toString(),
|
replacementQueue.toString(),
|
||||||
unicodeFont);
|
unicodeFont);
|
||||||
if (debug)
|
if (debug > 0)
|
||||||
System.out.println("unicode rq: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(replacementQueue.toString()));
|
System.out.println("unicode rq: " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToString(replacementQueue.toString()));
|
||||||
} else {
|
} else {
|
||||||
replaceDuffs(replacementFontSize,
|
replaceDuffs(replacementFontSize,
|
||||||
|
@ -1084,7 +1109,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
endIndex,
|
endIndex,
|
||||||
replacementQueue.toString(),
|
replacementQueue.toString(),
|
||||||
replacementFontIndex,
|
replacementFontIndex,
|
||||||
!toTM);
|
"TMW->TM" != mode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ceh.lastOffsetExamined = endPos.getOffset() - 1;
|
ceh.lastOffsetExamined = endPos.getOffset() - 1;
|
||||||
|
@ -1092,7 +1117,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
if (ceh.doErrorWrapup && ceh.errorGlyphLocation > 0) {
|
if (ceh.doErrorWrapup && ceh.errorGlyphLocation > 0) {
|
||||||
// Bracket the bad stuff with U+0F3C on the left
|
// Bracket the bad stuff with U+0F3C on the left
|
||||||
// and U+0F3D on the right:
|
// and U+0F3D on the right:
|
||||||
if (!(toUnicode || toTM)) {
|
if (!("TMW->Unicode" == mode
|
||||||
|
|| "TMW->TM" == mode
|
||||||
|
|| "TMW->TMW-identity" == mode)) {
|
||||||
equivalent[0].setData((char)209, 1);
|
equivalent[0].setData((char)209, 1);
|
||||||
insertDuff(72, ceh.errorGlyphLocation++,
|
insertDuff(72, ceh.errorGlyphLocation++,
|
||||||
equivalent, false);
|
equivalent, false);
|
||||||
|
@ -1254,6 +1281,30 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
To test Java's RTF support, it's helpful to just try and do an
|
||||||
|
identity TMW->TMW transformation (you can think of it as a
|
||||||
|
converter that converts nothing). I'm curious to see if the
|
||||||
|
problem we have with TMW->Unicode conversions failing to
|
||||||
|
preserve whitespace is a bug in our code or a bug in Java's RTF
|
||||||
|
support, and this provides one data point.
|
||||||
|
|
||||||
|
@return false on 100% success, true if any exceptional case was
|
||||||
|
encountered
|
||||||
|
@exception Error if start or end is out of range */
|
||||||
|
public boolean identityTmwToTmwConversion(int start,
|
||||||
|
int end,
|
||||||
|
long numAttemptedReplacements[]) {
|
||||||
|
StringBuffer errors = new StringBuffer();
|
||||||
|
boolean r = convertHelper(start, end, "TMW->TMW-identity",
|
||||||
|
errors, "Unicode Font should not be used",
|
||||||
|
numAttemptedReplacements);
|
||||||
|
System.err.println("<TMW_TO_SAME_TWM-errors>");
|
||||||
|
System.err.println(errors.toString());
|
||||||
|
System.err.println("</TMW_TO_SAME_TWM-errors>");
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns all the paragraph elements in this document that
|
/** Returns all the paragraph elements in this document that
|
||||||
* contain glyphs with offsets in the range [start, end) where
|
* contain glyphs with offsets in the range [start, end) where
|
||||||
* end < 0 is treated as the document's length. Note that roman,
|
* end < 0 is treated as the document's length. Note that roman,
|
||||||
|
|
|
@ -1372,6 +1372,39 @@ private static final DuffCode TMW_cr = new DuffCode(1, '\r');
|
||||||
private static final DuffCode TMW_lf = new DuffCode(1, '\n');
|
private static final DuffCode TMW_lf = new DuffCode(1, '\n');
|
||||||
private static final DuffCode TMW_tab = new DuffCode(1, '\t');
|
private static final DuffCode TMW_tab = new DuffCode(1, '\t');
|
||||||
|
|
||||||
|
/** An identity function used merely for testing. */
|
||||||
|
public static DuffCode mapTMWtoItself(int font, int ordinal, int suggestedFont) {
|
||||||
|
if (font < 0 || font > 9)
|
||||||
|
return null;
|
||||||
|
if (ordinal >= 255) {
|
||||||
|
throw new Error("I didn't know that ever happened.");
|
||||||
|
}
|
||||||
|
if (ordinal < 32) {
|
||||||
|
if (ordinal == (int)'\r') {
|
||||||
|
if (0 == suggestedFont)
|
||||||
|
return TMW_cr;
|
||||||
|
else
|
||||||
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
|
} else if (ordinal == (int)'\n') {
|
||||||
|
if (0 == suggestedFont)
|
||||||
|
return TMW_lf;
|
||||||
|
else
|
||||||
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
|
} else if (ordinal == (int)'\t') {
|
||||||
|
if (0 == suggestedFont)
|
||||||
|
return TMW_tab;
|
||||||
|
else
|
||||||
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
|
} else {
|
||||||
|
// for robustness, just return font 1, char ordinal.
|
||||||
|
ThdlDebug.noteIffyCode();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new DuffCode(font + 1, (char)ordinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
|
/** Returns the DuffCode for the TibetanMachineWeb glyph corresponding
|
||||||
to the given TibetanMachine font
|
to the given TibetanMachine font
|
||||||
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
|
(0=norm,1=Skt1,2=Skt2,3=Skt3,4=Skt4) and character(32-254).
|
||||||
|
@ -1406,17 +1439,17 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TMW_cr;
|
return TMW_cr;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else if (ordinal == (int)'\n') {
|
} else if (ordinal == (int)'\n') {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TMW_lf;
|
return TMW_lf;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else if (ordinal == (int)'\t') {
|
} else if (ordinal == (int)'\t') {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TMW_tab;
|
return TMW_tab;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else {
|
} else {
|
||||||
// for robustness, just return font 1, char ordinal.
|
// for robustness, just return font 1, char ordinal.
|
||||||
ThdlDebug.noteIffyCode();
|
ThdlDebug.noteIffyCode();
|
||||||
|
@ -1430,7 +1463,7 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
|
||||||
// assume that a machine goofed along the way. (FIXME: optionally
|
// assume that a machine goofed along the way. (FIXME: optionally
|
||||||
// warn.)
|
// warn.)
|
||||||
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
|
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
}
|
}
|
||||||
return TMtoTMW[font][ordinal-32];
|
return TMtoTMW[font][ordinal-32];
|
||||||
}
|
}
|
||||||
|
@ -1465,17 +1498,17 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TM_cr;
|
return TM_cr;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else if (ordinal == (int)'\n') {
|
} else if (ordinal == (int)'\n') {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TM_lf;
|
return TM_lf;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else if (ordinal == (int)'\t') {
|
} else if (ordinal == (int)'\t') {
|
||||||
if (0 == suggestedFont)
|
if (0 == suggestedFont)
|
||||||
return TM_tab;
|
return TM_tab;
|
||||||
else
|
else
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
} else {
|
} else {
|
||||||
// for robustness, just return font 1, char ordinal.
|
// for robustness, just return font 1, char ordinal.
|
||||||
ThdlDebug.noteIffyCode();
|
ThdlDebug.noteIffyCode();
|
||||||
|
@ -1488,7 +1521,7 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
|
||||||
return new DuffCode(1, (char)ordinal);
|
return new DuffCode(1, (char)ordinal);
|
||||||
}
|
}
|
||||||
if ((0 != suggestedFont) && (32 == ordinal)) {
|
if ((0 != suggestedFont) && (32 == ordinal)) {
|
||||||
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
|
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it needlessly taxes the GC
|
||||||
}
|
}
|
||||||
DuffCode ans = TMWtoTM[font][ordinal-32];
|
DuffCode ans = TMWtoTM[font][ordinal-32];
|
||||||
return ans;
|
return ans;
|
||||||
|
|
Loading…
Reference in a new issue