Jskad now allows for TMW->Unicode conversion.

This commit is contained in:
dchandler 2003-06-15 16:27:36 +00:00
parent af5b95b08d
commit da70434e52
3 changed files with 159 additions and 29 deletions

View file

@ -332,7 +332,7 @@ public class Jskad extends JPanel implements DocumentListener {
= ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
+ errors.toString(),
"TMW to TM Errors",
JOptionPane.PLAIN_MESSAGE);
@ -352,7 +352,7 @@ public class Jskad extends JPanel implements DocumentListener {
= ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
+ errors.toString(),
"TM to TMW Errors", JOptionPane.PLAIN_MESSAGE);
} else {
@ -362,9 +362,29 @@ public class Jskad extends JPanel implements DocumentListener {
}
}
});
JMenuItem toUnicodeItem = new JMenuItem("Convert TMW to Unicode"); // DLC FIXME: do it just in the selection?
toUnicodeItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
StringBuffer errors = new StringBuffer();
boolean errorReturn
= ((TibetanDocument)dp.getDocument()).convertToUnicode(0, -1, errors); // entire document
if (errorReturn) {
JOptionPane.showMessageDialog(Jskad.this,
"At least one error occurred while converting Tibetan Machine Web\nto Unicode. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
+ errors.toString(),
"TMW to Unicode Errors", JOptionPane.PLAIN_MESSAGE);
} else {
JOptionPane.showMessageDialog(Jskad.this,
"Converting Tibetan Machine Web to Unicode met with perfect success.",
"Success", JOptionPane.PLAIN_MESSAGE);
}
}
});
toolsMenu.addSeparator();
toolsMenu.add(toTMItem);
toolsMenu.add(toTMWItem);
toolsMenu.add(toUnicodeItem);
}

View file

@ -174,6 +174,21 @@ public class TibetanDocument extends DefaultStyledDocument {
}
}
/** Replacing can be more efficient than inserting and then
removing. This replaces the glyph at position pos with
unicode. The font size for the new unicode is fontSize. */
private void replaceDuffWithUnicode(int fontSize, int pos,
String unicode) {
MutableAttributeSet mas
= TibetanMachineWeb.getUnicodeAttributeSet();
StyleConstants.setFontSize(mas, fontSize);
try {
replace(pos, 1, unicode, mas);
} catch (BadLocationException ble) {
ThdlDebug.noteIffyCode();
}
}
private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
if (glyphs == null)
return pos;
@ -441,7 +456,7 @@ public class TibetanDocument extends DefaultStyledDocument {
cases will be appended to this StringBuffer
*/
public boolean convertToTM(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, true, errors);
return convertHelper(begin, end, true, false, errors);
}
/** Converts all TibetanMachine glyphs in the document to
@ -457,7 +472,22 @@ public class TibetanDocument extends DefaultStyledDocument {
cases will be appended to this StringBuffer
*/
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
return convertTMW_TM(begin, end, false, errors);
return convertHelper(begin, end, false, false, errors);
}
/** Converts all TibetanMachineWeb glyphs in the document to
Unicode. Works within the range [start, end). Using a
negative number for end means that this will run to the end of
the document. Be sure to set the size for Tibetan as you like
it before using this (well, it usually gets it right on its
own, but just in case). SPEED_FIXME: might be faster to run
over the elements, if they are one per font.
@return false on 100% success, true if any exceptional case
was encountered
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer */
public boolean convertToUnicode(int begin, int end, StringBuffer errors) {
return convertHelper(begin, end, false, true, errors);
}
/** For debugging only. Start with an empty document, and call
@ -594,15 +624,20 @@ public class TibetanDocument extends DefaultStyledDocument {
return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
}
/** Helper function.
/** Helper function. Converts TMW->TM if !toUnicode&&toTM,
TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode.
@param errors if non-null, then notes about all exceptional
cases will be appended to this StringBuffer
@return false on 100% success, true if any exceptional case
was encountered
@see convertToUnicode(int,int)
@see convertToTMW(int,int)
@see convertToTM(int,int) */
private boolean convertTMW_TM(int begin, int end, boolean toTM,
StringBuffer errors) {
private boolean convertHelper(int begin, int end, boolean toTM,
boolean toUnicode, StringBuffer errors) {
// toTM is ignored when toUnicode is true:
ThdlDebug.verify(!toUnicode || !toTM);
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
boolean errorReturn = false;
if (end < 0)
@ -620,22 +655,26 @@ public class TibetanDocument extends DefaultStyledDocument {
AttributeSet attr = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr);
int fontNum
= (toTM
= ((toTM || toUnicode)
? TibetanMachineWeb.getTMWFontNumber(fontName)
: TibetanMachineWeb.getTMFontNumber(fontName));
if (0 != fontNum) {
DuffCode dc = null;
if (toTM) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0));
String unicode = null;
if (toUnicode) {
unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
getText(i,1).charAt(0));
} else {
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
getText(i,1).charAt(0));
if (toTM) {
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
getText(i,1).charAt(0));
} else {
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
getText(i,1).charAt(0));
}
}
if (null != dc) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
if (null != dc || null != unicode) {
// SPEED_FIXME: determining font size might be slow
int fontSize = tibetanFontSize;
try {
@ -643,6 +682,12 @@ public class TibetanDocument extends DefaultStyledDocument {
} catch (Exception e) {
// leave it as tibetanFontSize
}
if (!toUnicode) {
equivalent[0].setData(dc.getCharacter(),
dc.getFontNum());
}
// We have two choices: remove-then-insert
// second vs. insert-then-remove and also
// insert-before vs. insert-after. It turns
@ -651,8 +696,13 @@ public class TibetanDocument extends DefaultStyledDocument {
// insert-then-remove because we're guessing
// that helps with formatting too.
if (replaceInsteadOfInserting()) {
replaceDuff(fontSize, i, equivalent[0], !toTM);
if (toUnicode) {
replaceDuffWithUnicode(fontSize, i, unicode);
} else {
replaceDuff(fontSize, i, equivalent[0], !toTM);
}
} else {
ThdlDebug.verify(!toUnicode); // DLC NOW
if (insertBefore()) {
insertDuff(fontSize, i, equivalent, !toTM);
remove(i+1, 1);
@ -679,7 +729,9 @@ public class TibetanDocument extends DefaultStyledDocument {
problemGlyphsTable.put(cgf, "yes this character appears once");
if (null != errors) {
String err
= (toTM ? "TMW->TM" : "TM->TMW")
= (toUnicode
? "TMW->Unicode"
: (toTM ? "TMW->TM" : "TM->TMW"))
+ " conversion failed for a glyph:\nFont is "
+ fontName + ", glyph number is "
+ (int)getText(i,1).charAt(0)
@ -694,16 +746,18 @@ public class TibetanDocument extends DefaultStyledDocument {
// the beginning of the document:
equivalent[0].setData(getText(i,1), fontNum);
insertDuff(72, errorGlyphLocation++,
equivalent, toTM);
equivalent, toUnicode || toTM);
++i;
}
}
String trickyTMW
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
equivalent[0].setData(trickyTMW, 1);
insertDuff(72, i, equivalent, true);
i += trickyTMW.length();
if (ThdlOptions.getBooleanOption("thdl.leave.bad.tm.tmw.conversions.in.place")) {
String trickyTMW
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
equivalent[0].setData(trickyTMW, 1);
insertDuff(72, i, equivalent, true);
i += trickyTMW.length();
}
}
}
i++;

View file

@ -58,7 +58,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private final static String anyOldObjectWillDo
= "this placeholder is useful for debugging; we need a nonnull Object anyway";
private static boolean hasReadData = false;
private static TibetanKeyboard keyboard = null;
private static Set charSet = null;
private static Set vowelSet = null;
@ -72,9 +71,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
private static String[][] TMWtoUnicode = new String[10][127-32]; // ordinal 127 doesn't occur in TMW
private static String fileName = "tibwn.ini";
private static final String DELIMITER = "~";
private static Set top_vowels;
/** the font we use when we convert TMW->Unicode: */
private static SimpleAttributeSet unicodeFontAttributeSet = null;
/** a way of encoding the choice of TibetanMachineWeb font from
that family of 10 fonts: */
private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
@ -261,6 +263,11 @@ public class TibetanMachineWeb implements THDLWylieConstants {
readInFontFiles();
}
unicodeFontAttributeSet = new SimpleAttributeSet();
StyleConstants.setFontFamily(unicodeFontAttributeSet,
ThdlOptions.getStringOption("thdl.tmw.to.unicode.font",
"Arial Unicode MS"));
webFontAttributeSet[0] = null;
for (int i=1; i<webFontAttributeSet.length; i++) {
webFontAttributeSet[i] = new SimpleAttributeSet();
@ -473,11 +480,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
ThdlDebug.verify(false);
}
}
// DLC FIXME: use unicodeBuffer for a TMW->Unicode conversion.
TMWtoUnicode[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
= unicodeBuffer.toString(); // TMW->Unicode mapping
// For V&V:
// DLC FIXME: also check for ^[90-bc] and ^.+[40-6a]
// DLC FIXME: also check for ^[90-bc]. and ^.+[40-6a]
// StringBuffer wylie_minus_plusses_buf
// = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString());
@ -545,8 +553,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {
System.out.println("file Disappeared");
ThdlDebug.noteIffyCode();
}
hasReadData = true;
}
/**
@ -634,6 +640,17 @@ public static SimpleAttributeSet getAttributeSet(int font) {
return null;
}
/**
* Gets the AttributeSet for the font we use for the Unicode we create
* in our TMW->Unicode conversion. This information is required in
* order to be able to put styled text into {@link TibetanDocument
* TibetanDocument}.
* @return a SimpleAttributeSet for the Unicode font - that is, a way
* of encoding the font itself */
public static SimpleAttributeSet getUnicodeAttributeSet() {
return unicodeFontAttributeSet;
}
/**
* Gets the AttributeSet for the given TibetanMachine font.
* This information is required in order to be able to put styled
@ -1149,6 +1166,45 @@ private static DuffCode getUnusualTMtoTMW(int font, int code) {
}
}
private static final String Unicode_cr = "\r";
private static final String Unicode_lf = "\n";
private static final String Unicode_tab = "\t";
/** Returns the sequence of Unicode corresponding to the given
TibetanMachineWeb font
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and
character(32-127).
Null is returned for an existing TibetanMachineWeb glyph if and
only if that glyph has no corresponding Unicode mapping. Null is
returned if the input isn't valid.
Only a few control characters are supported: '\r' (carriage
return), '\n' (line feed), and '\t' (tab).
*/
public static String mapTMWtoUnicode(int font, int ordinal) {
if (font < 0 || font > 9)
return null;
if (ordinal > 127)
return null;
if (ordinal < 32) {
if (ordinal == (int)'\r')
return Unicode_cr;
else if (ordinal == (int)'\n')
return Unicode_lf;
else if (ordinal == (int)'\t')
return Unicode_tab;
else {
// for robustness, just return a String consisting of the
// character which has the ordinal 'ordinal'.
ThdlDebug.noteIffyCode();
return null;
}
}
return TMWtoUnicode[font][ordinal-32];
}
/**
* Gets the TibetanMachine font number for this font name.
* @param name a font name