Jskad now allows for TMW->Unicode conversion.
This commit is contained in:
parent
af5b95b08d
commit
da70434e52
3 changed files with 159 additions and 29 deletions
|
@ -332,7 +332,7 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
= ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document
|
||||
if (errorReturn) {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
|
||||
"At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
|
||||
+ errors.toString(),
|
||||
"TMW to TM Errors",
|
||||
JOptionPane.PLAIN_MESSAGE);
|
||||
|
@ -352,7 +352,7 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
= ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document
|
||||
if (errorReturn) {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n"
|
||||
"At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
|
||||
+ errors.toString(),
|
||||
"TM to TMW Errors", JOptionPane.PLAIN_MESSAGE);
|
||||
} else {
|
||||
|
@ -362,9 +362,29 @@ public class Jskad extends JPanel implements DocumentListener {
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
JMenuItem toUnicodeItem = new JMenuItem("Convert TMW to Unicode"); // DLC FIXME: do it just in the selection?
|
||||
toUnicodeItem.addActionListener(new ThdlActionListener() {
|
||||
public void theRealActionPerformed(ActionEvent e) {
|
||||
StringBuffer errors = new StringBuffer();
|
||||
boolean errorReturn
|
||||
= ((TibetanDocument)dp.getDocument()).convertToUnicode(0, -1, errors); // entire document
|
||||
if (errorReturn) {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"At least one error occurred while converting Tibetan Machine Web\nto Unicode. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n"
|
||||
+ errors.toString(),
|
||||
"TMW to Unicode Errors", JOptionPane.PLAIN_MESSAGE);
|
||||
} else {
|
||||
JOptionPane.showMessageDialog(Jskad.this,
|
||||
"Converting Tibetan Machine Web to Unicode met with perfect success.",
|
||||
"Success", JOptionPane.PLAIN_MESSAGE);
|
||||
}
|
||||
}
|
||||
});
|
||||
toolsMenu.addSeparator();
|
||||
toolsMenu.add(toTMItem);
|
||||
toolsMenu.add(toTMWItem);
|
||||
toolsMenu.add(toUnicodeItem);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -174,6 +174,21 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
}
|
||||
}
|
||||
|
||||
/** Replacing can be more efficient than inserting and then
|
||||
removing. This replaces the glyph at position pos with
|
||||
unicode. The font size for the new unicode is fontSize. */
|
||||
private void replaceDuffWithUnicode(int fontSize, int pos,
|
||||
String unicode) {
|
||||
MutableAttributeSet mas
|
||||
= TibetanMachineWeb.getUnicodeAttributeSet();
|
||||
StyleConstants.setFontSize(mas, fontSize);
|
||||
try {
|
||||
replace(pos, 1, unicode, mas);
|
||||
} catch (BadLocationException ble) {
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
}
|
||||
|
||||
private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) {
|
||||
if (glyphs == null)
|
||||
return pos;
|
||||
|
@ -441,7 +456,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
cases will be appended to this StringBuffer
|
||||
*/
|
||||
public boolean convertToTM(int begin, int end, StringBuffer errors) {
|
||||
return convertTMW_TM(begin, end, true, errors);
|
||||
return convertHelper(begin, end, true, false, errors);
|
||||
}
|
||||
|
||||
/** Converts all TibetanMachine glyphs in the document to
|
||||
|
@ -457,7 +472,22 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
cases will be appended to this StringBuffer
|
||||
*/
|
||||
public boolean convertToTMW(int begin, int end, StringBuffer errors) {
|
||||
return convertTMW_TM(begin, end, false, errors);
|
||||
return convertHelper(begin, end, false, false, errors);
|
||||
}
|
||||
|
||||
/** Converts all TibetanMachineWeb glyphs in the document to
|
||||
Unicode. Works within the range [start, end). Using a
|
||||
negative number for end means that this will run to the end of
|
||||
the document. Be sure to set the size for Tibetan as you like
|
||||
it before using this (well, it usually gets it right on its
|
||||
own, but just in case). SPEED_FIXME: might be faster to run
|
||||
over the elements, if they are one per font.
|
||||
@return false on 100% success, true if any exceptional case
|
||||
was encountered
|
||||
@param errors if non-null, then notes about all exceptional
|
||||
cases will be appended to this StringBuffer */
|
||||
public boolean convertToUnicode(int begin, int end, StringBuffer errors) {
|
||||
return convertHelper(begin, end, false, true, errors);
|
||||
}
|
||||
|
||||
/** For debugging only. Start with an empty document, and call
|
||||
|
@ -594,15 +624,20 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing");
|
||||
}
|
||||
|
||||
/** Helper function.
|
||||
/** Helper function. Converts TMW->TM if !toUnicode&&toTM,
|
||||
TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode.
|
||||
@param errors if non-null, then notes about all exceptional
|
||||
cases will be appended to this StringBuffer
|
||||
@return false on 100% success, true if any exceptional case
|
||||
was encountered
|
||||
@see convertToUnicode(int,int)
|
||||
@see convertToTMW(int,int)
|
||||
@see convertToTM(int,int) */
|
||||
private boolean convertTMW_TM(int begin, int end, boolean toTM,
|
||||
StringBuffer errors) {
|
||||
private boolean convertHelper(int begin, int end, boolean toTM,
|
||||
boolean toUnicode, StringBuffer errors) {
|
||||
// toTM is ignored when toUnicode is true:
|
||||
ThdlDebug.verify(!toUnicode || !toTM);
|
||||
|
||||
boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug");
|
||||
boolean errorReturn = false;
|
||||
if (end < 0)
|
||||
|
@ -620,22 +655,26 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
AttributeSet attr = getCharacterElement(i).getAttributes();
|
||||
String fontName = StyleConstants.getFontFamily(attr);
|
||||
int fontNum
|
||||
= (toTM
|
||||
= ((toTM || toUnicode)
|
||||
? TibetanMachineWeb.getTMWFontNumber(fontName)
|
||||
: TibetanMachineWeb.getTMFontNumber(fontName));
|
||||
|
||||
if (0 != fontNum) {
|
||||
DuffCode dc = null;
|
||||
if (toTM) {
|
||||
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
String unicode = null;
|
||||
if (toUnicode) {
|
||||
unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
} else {
|
||||
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
if (toTM) {
|
||||
dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
} else {
|
||||
dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1,
|
||||
getText(i,1).charAt(0));
|
||||
}
|
||||
}
|
||||
if (null != dc) {
|
||||
equivalent[0].setData(dc.getCharacter(),
|
||||
dc.getFontNum());
|
||||
if (null != dc || null != unicode) {
|
||||
// SPEED_FIXME: determining font size might be slow
|
||||
int fontSize = tibetanFontSize;
|
||||
try {
|
||||
|
@ -643,6 +682,12 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
} catch (Exception e) {
|
||||
// leave it as tibetanFontSize
|
||||
}
|
||||
|
||||
if (!toUnicode) {
|
||||
equivalent[0].setData(dc.getCharacter(),
|
||||
dc.getFontNum());
|
||||
}
|
||||
|
||||
// We have two choices: remove-then-insert
|
||||
// second vs. insert-then-remove and also
|
||||
// insert-before vs. insert-after. It turns
|
||||
|
@ -651,8 +696,13 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
// insert-then-remove because we're guessing
|
||||
// that helps with formatting too.
|
||||
if (replaceInsteadOfInserting()) {
|
||||
replaceDuff(fontSize, i, equivalent[0], !toTM);
|
||||
if (toUnicode) {
|
||||
replaceDuffWithUnicode(fontSize, i, unicode);
|
||||
} else {
|
||||
replaceDuff(fontSize, i, equivalent[0], !toTM);
|
||||
}
|
||||
} else {
|
||||
ThdlDebug.verify(!toUnicode); // DLC NOW
|
||||
if (insertBefore()) {
|
||||
insertDuff(fontSize, i, equivalent, !toTM);
|
||||
remove(i+1, 1);
|
||||
|
@ -679,7 +729,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
problemGlyphsTable.put(cgf, "yes this character appears once");
|
||||
if (null != errors) {
|
||||
String err
|
||||
= (toTM ? "TMW->TM" : "TM->TMW")
|
||||
= (toUnicode
|
||||
? "TMW->Unicode"
|
||||
: (toTM ? "TMW->TM" : "TM->TMW"))
|
||||
+ " conversion failed for a glyph:\nFont is "
|
||||
+ fontName + ", glyph number is "
|
||||
+ (int)getText(i,1).charAt(0)
|
||||
|
@ -694,16 +746,18 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
// the beginning of the document:
|
||||
equivalent[0].setData(getText(i,1), fontNum);
|
||||
insertDuff(72, errorGlyphLocation++,
|
||||
equivalent, toTM);
|
||||
equivalent, toUnicode || toTM);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
String trickyTMW
|
||||
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
|
||||
equivalent[0].setData(trickyTMW, 1);
|
||||
insertDuff(72, i, equivalent, true);
|
||||
i += trickyTMW.length();
|
||||
if (ThdlOptions.getBooleanOption("thdl.leave.bad.tm.tmw.conversions.in.place")) {
|
||||
String trickyTMW
|
||||
= "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-";
|
||||
equivalent[0].setData(trickyTMW, 1);
|
||||
insertDuff(72, i, equivalent, true);
|
||||
i += trickyTMW.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
i++;
|
||||
|
|
|
@ -58,7 +58,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
private final static String anyOldObjectWillDo
|
||||
= "this placeholder is useful for debugging; we need a nonnull Object anyway";
|
||||
|
||||
private static boolean hasReadData = false;
|
||||
private static TibetanKeyboard keyboard = null;
|
||||
private static Set charSet = null;
|
||||
private static Set vowelSet = null;
|
||||
|
@ -72,9 +71,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
|
||||
private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
|
||||
private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
|
||||
private static String[][] TMWtoUnicode = new String[10][127-32]; // ordinal 127 doesn't occur in TMW
|
||||
private static String fileName = "tibwn.ini";
|
||||
private static final String DELIMITER = "~";
|
||||
private static Set top_vowels;
|
||||
/** the font we use when we convert TMW->Unicode: */
|
||||
private static SimpleAttributeSet unicodeFontAttributeSet = null;
|
||||
/** a way of encoding the choice of TibetanMachineWeb font from
|
||||
that family of 10 fonts: */
|
||||
private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
|
||||
|
@ -261,6 +263,11 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
readInFontFiles();
|
||||
}
|
||||
|
||||
unicodeFontAttributeSet = new SimpleAttributeSet();
|
||||
StyleConstants.setFontFamily(unicodeFontAttributeSet,
|
||||
ThdlOptions.getStringOption("thdl.tmw.to.unicode.font",
|
||||
"Arial Unicode MS"));
|
||||
|
||||
webFontAttributeSet[0] = null;
|
||||
for (int i=1; i<webFontAttributeSet.length; i++) {
|
||||
webFontAttributeSet[i] = new SimpleAttributeSet();
|
||||
|
@ -473,11 +480,12 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
ThdlDebug.verify(false);
|
||||
}
|
||||
}
|
||||
// DLC FIXME: use unicodeBuffer for a TMW->Unicode conversion.
|
||||
TMWtoUnicode[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
|
||||
= unicodeBuffer.toString(); // TMW->Unicode mapping
|
||||
|
||||
// For V&V:
|
||||
|
||||
// DLC FIXME: also check for ^[90-bc] and ^.+[40-6a]
|
||||
// DLC FIXME: also check for ^[90-bc]. and ^.+[40-6a]
|
||||
|
||||
// StringBuffer wylie_minus_plusses_buf
|
||||
// = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString());
|
||||
|
@ -545,8 +553,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {
|
|||
System.out.println("file Disappeared");
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
|
||||
hasReadData = true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -634,6 +640,17 @@ public static SimpleAttributeSet getAttributeSet(int font) {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the AttributeSet for the font we use for the Unicode we create
|
||||
* in our TMW->Unicode conversion. This information is required in
|
||||
* order to be able to put styled text into {@link TibetanDocument
|
||||
* TibetanDocument}.
|
||||
* @return a SimpleAttributeSet for the Unicode font - that is, a way
|
||||
* of encoding the font itself */
|
||||
public static SimpleAttributeSet getUnicodeAttributeSet() {
|
||||
return unicodeFontAttributeSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the AttributeSet for the given TibetanMachine font.
|
||||
* This information is required in order to be able to put styled
|
||||
|
@ -1149,6 +1166,45 @@ private static DuffCode getUnusualTMtoTMW(int font, int code) {
|
|||
}
|
||||
}
|
||||
|
||||
private static final String Unicode_cr = "\r";
|
||||
private static final String Unicode_lf = "\n";
|
||||
private static final String Unicode_tab = "\t";
|
||||
|
||||
|
||||
/** Returns the sequence of Unicode corresponding to the given
|
||||
TibetanMachineWeb font
|
||||
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and
|
||||
character(32-127).
|
||||
|
||||
Null is returned for an existing TibetanMachineWeb glyph if and
|
||||
only if that glyph has no corresponding Unicode mapping. Null is
|
||||
returned if the input isn't valid.
|
||||
|
||||
Only a few control characters are supported: '\r' (carriage
|
||||
return), '\n' (line feed), and '\t' (tab).
|
||||
*/
|
||||
public static String mapTMWtoUnicode(int font, int ordinal) {
|
||||
if (font < 0 || font > 9)
|
||||
return null;
|
||||
if (ordinal > 127)
|
||||
return null;
|
||||
if (ordinal < 32) {
|
||||
if (ordinal == (int)'\r')
|
||||
return Unicode_cr;
|
||||
else if (ordinal == (int)'\n')
|
||||
return Unicode_lf;
|
||||
else if (ordinal == (int)'\t')
|
||||
return Unicode_tab;
|
||||
else {
|
||||
// for robustness, just return a String consisting of the
|
||||
// character which has the ordinal 'ordinal'.
|
||||
ThdlDebug.noteIffyCode();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return TMWtoUnicode[font][ordinal-32];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the TibetanMachine font number for this font name.
|
||||
* @param name a font name
|
||||
|
|
Loading…
Reference in a new issue