diff --git a/source/org/thdl/tib/input/Jskad.java b/source/org/thdl/tib/input/Jskad.java index bbf6d79..4fa80ba 100644 --- a/source/org/thdl/tib/input/Jskad.java +++ b/source/org/thdl/tib/input/Jskad.java @@ -332,7 +332,7 @@ public class Jskad extends JPanel implements DocumentListener { = ((TibetanDocument)dp.getDocument()).convertToTM(0, -1, errors); // entire document if (errorReturn) { JOptionPane.showMessageDialog(Jskad.this, - "At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n" + "At least one error occurred while converting Tibetan Machine Web\nto Tibetan Machine. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n" + errors.toString(), "TMW to TM Errors", JOptionPane.PLAIN_MESSAGE); @@ -352,7 +352,7 @@ public class Jskad extends JPanel implements DocumentListener { = ((TibetanDocument)dp.getDocument()).convertToTMW(0, -1, errors); // entire document if (errorReturn) { JOptionPane.showMessageDialog(Jskad.this, - "At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the glyphs found after the 72-point Tibetan Machine Web\n30-letter alphabet.\nThe following glyphs were problems:\n" + "At least one error occurred while converting Tibetan Machine\nto Tibetan Machine Web. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n" + errors.toString(), "TM to TMW Errors", JOptionPane.PLAIN_MESSAGE); } else { @@ -362,9 +362,29 @@ public class Jskad extends JPanel implements DocumentListener { } } }); + + JMenuItem toUnicodeItem = new JMenuItem("Convert TMW to Unicode"); // DLC FIXME: do it just in the selection? + toUnicodeItem.addActionListener(new ThdlActionListener() { + public void theRealActionPerformed(ActionEvent e) { + StringBuffer errors = new StringBuffer(); + boolean errorReturn + = ((TibetanDocument)dp.getDocument()).convertToUnicode(0, -1, errors); // entire document + if (errorReturn) { + JOptionPane.showMessageDialog(Jskad.this, + "At least one error occurred while converting Tibetan Machine Web\nto Unicode. Your document is mostly converted,\nexcept for the following glyphs, which you should replace manually\nbefore retrying:\n" + + errors.toString(), + "TMW to Unicode Errors", JOptionPane.PLAIN_MESSAGE); + } else { + JOptionPane.showMessageDialog(Jskad.this, + "Converting Tibetan Machine Web to Unicode met with perfect success.", + "Success", JOptionPane.PLAIN_MESSAGE); + } + } + }); toolsMenu.addSeparator(); toolsMenu.add(toTMItem); toolsMenu.add(toTMWItem); + toolsMenu.add(toUnicodeItem); } diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index cdba5c0..49d02a6 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -174,6 +174,21 @@ public class TibetanDocument extends DefaultStyledDocument { } } + /** Replacing can be more efficient than inserting and then + removing. This replaces the glyph at position pos with + unicode. The font size for the new unicode is fontSize. */ + private void replaceDuffWithUnicode(int fontSize, int pos, + String unicode) { + MutableAttributeSet mas + = TibetanMachineWeb.getUnicodeAttributeSet(); + StyleConstants.setFontSize(mas, fontSize); + try { + replace(pos, 1, unicode, mas); + } catch (BadLocationException ble) { + ThdlDebug.noteIffyCode(); + } + } + private int insertDuff(int fontSize, int pos, DuffData[] glyphs, boolean asTMW) { if (glyphs == null) return pos; @@ -441,7 +456,7 @@ public class TibetanDocument extends DefaultStyledDocument { cases will be appended to this StringBuffer */ public boolean convertToTM(int begin, int end, StringBuffer errors) { - return convertTMW_TM(begin, end, true, errors); + return convertHelper(begin, end, true, false, errors); } /** Converts all TibetanMachine glyphs in the document to @@ -457,7 +472,22 @@ public class TibetanDocument extends DefaultStyledDocument { cases will be appended to this StringBuffer */ public boolean convertToTMW(int begin, int end, StringBuffer errors) { - return convertTMW_TM(begin, end, false, errors); + return convertHelper(begin, end, false, false, errors); + } + + /** Converts all TibetanMachineWeb glyphs in the document to + Unicode. Works within the range [start, end). Using a + negative number for end means that this will run to the end of + the document. Be sure to set the size for Tibetan as you like + it before using this (well, it usually gets it right on its + own, but just in case). SPEED_FIXME: might be faster to run + over the elements, if they are one per font. + @return false on 100% success, true if any exceptional case + was encountered + @param errors if non-null, then notes about all exceptional + cases will be appended to this StringBuffer */ + public boolean convertToUnicode(int begin, int end, StringBuffer errors) { + return convertHelper(begin, end, false, true, errors); } /** For debugging only. Start with an empty document, and call @@ -594,15 +624,20 @@ public class TibetanDocument extends DefaultStyledDocument { return !ThdlOptions.getBooleanOption("thdl.insert.and.remove.instead.of.replacing"); } - /** Helper function. + /** Helper function. Converts TMW->TM if !toUnicode&&toTM, + TM->TMW if !toUnicode&&!toTM, TMW->Unicode if toUnicode. @param errors if non-null, then notes about all exceptional cases will be appended to this StringBuffer @return false on 100% success, true if any exceptional case was encountered + @see convertToUnicode(int,int) @see convertToTMW(int,int) @see convertToTM(int,int) */ - private boolean convertTMW_TM(int begin, int end, boolean toTM, - StringBuffer errors) { + private boolean convertHelper(int begin, int end, boolean toTM, + boolean toUnicode, StringBuffer errors) { + // toTM is ignored when toUnicode is true: + ThdlDebug.verify(!toUnicode || !toTM); + boolean toStdout = ThdlOptions.getBooleanOption("thdl.debug"); boolean errorReturn = false; if (end < 0) @@ -620,22 +655,26 @@ public class TibetanDocument extends DefaultStyledDocument { AttributeSet attr = getCharacterElement(i).getAttributes(); String fontName = StyleConstants.getFontFamily(attr); int fontNum - = (toTM + = ((toTM || toUnicode) ? TibetanMachineWeb.getTMWFontNumber(fontName) : TibetanMachineWeb.getTMFontNumber(fontName)); if (0 != fontNum) { DuffCode dc = null; - if (toTM) { - dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1, - getText(i,1).charAt(0)); + String unicode = null; + if (toUnicode) { + unicode = TibetanMachineWeb.mapTMWtoUnicode(fontNum - 1, + getText(i,1).charAt(0)); } else { - dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1, - getText(i,1).charAt(0)); + if (toTM) { + dc = TibetanMachineWeb.mapTMWtoTM(fontNum - 1, + getText(i,1).charAt(0)); + } else { + dc = TibetanMachineWeb.mapTMtoTMW(fontNum - 1, + getText(i,1).charAt(0)); + } } - if (null != dc) { - equivalent[0].setData(dc.getCharacter(), - dc.getFontNum()); + if (null != dc || null != unicode) { // SPEED_FIXME: determining font size might be slow int fontSize = tibetanFontSize; try { @@ -643,6 +682,12 @@ public class TibetanDocument extends DefaultStyledDocument { } catch (Exception e) { // leave it as tibetanFontSize } + + if (!toUnicode) { + equivalent[0].setData(dc.getCharacter(), + dc.getFontNum()); + } + // We have two choices: remove-then-insert // second vs. insert-then-remove and also // insert-before vs. insert-after. It turns @@ -651,8 +696,13 @@ public class TibetanDocument extends DefaultStyledDocument { // insert-then-remove because we're guessing // that helps with formatting too. if (replaceInsteadOfInserting()) { - replaceDuff(fontSize, i, equivalent[0], !toTM); + if (toUnicode) { + replaceDuffWithUnicode(fontSize, i, unicode); + } else { + replaceDuff(fontSize, i, equivalent[0], !toTM); + } } else { + ThdlDebug.verify(!toUnicode); // DLC NOW if (insertBefore()) { insertDuff(fontSize, i, equivalent, !toTM); remove(i+1, 1); @@ -679,7 +729,9 @@ public class TibetanDocument extends DefaultStyledDocument { problemGlyphsTable.put(cgf, "yes this character appears once"); if (null != errors) { String err - = (toTM ? "TMW->TM" : "TM->TMW") + = (toUnicode + ? "TMW->Unicode" + : (toTM ? "TMW->TM" : "TM->TMW")) + " conversion failed for a glyph:\nFont is " + fontName + ", glyph number is " + (int)getText(i,1).charAt(0) @@ -694,16 +746,18 @@ public class TibetanDocument extends DefaultStyledDocument { // the beginning of the document: equivalent[0].setData(getText(i,1), fontNum); insertDuff(72, errorGlyphLocation++, - equivalent, toTM); + equivalent, toUnicode || toTM); ++i; } } - String trickyTMW - = "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-"; - equivalent[0].setData(trickyTMW, 1); - insertDuff(72, i, equivalent, true); - i += trickyTMW.length(); + if (ThdlOptions.getBooleanOption("thdl.leave.bad.tm.tmw.conversions.in.place")) { + String trickyTMW + = "!-\"-#-$-%-&-'-(-)-*-+-,-.-/-0-1-2-3-4-5-6-7-8-9-:-;-<-=->-?-"; + equivalent[0].setData(trickyTMW, 1); + insertDuff(72, i, equivalent, true); + i += trickyTMW.length(); + } } } i++; diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index fb6702c..7a7d0ab 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -58,7 +58,6 @@ public class TibetanMachineWeb implements THDLWylieConstants { private final static String anyOldObjectWillDo = "this placeholder is useful for debugging; we need a nonnull Object anyway"; - private static boolean hasReadData = false; private static TibetanKeyboard keyboard = null; private static Set charSet = null; private static Set vowelSet = null; @@ -72,9 +71,12 @@ public class TibetanMachineWeb implements THDLWylieConstants { private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW + private static String[][] TMWtoUnicode = new String[10][127-32]; // ordinal 127 doesn't occur in TMW private static String fileName = "tibwn.ini"; private static final String DELIMITER = "~"; private static Set top_vowels; + /** the font we use when we convert TMW->Unicode: */ + private static SimpleAttributeSet unicodeFontAttributeSet = null; /** a way of encoding the choice of TibetanMachineWeb font from that family of 10 fonts: */ private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11]; @@ -261,6 +263,11 @@ public class TibetanMachineWeb implements THDLWylieConstants { readInFontFiles(); } + unicodeFontAttributeSet = new SimpleAttributeSet(); + StyleConstants.setFontFamily(unicodeFontAttributeSet, + ThdlOptions.getStringOption("thdl.tmw.to.unicode.font", + "Arial Unicode MS")); + webFontAttributeSet[0] = null; for (int i=1; iUnicode conversion. + TMWtoUnicode[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32] + = unicodeBuffer.toString(); // TMW->Unicode mapping // For V&V: -// DLC FIXME: also check for ^[90-bc] and ^.+[40-6a] +// DLC FIXME: also check for ^[90-bc]. and ^.+[40-6a] // StringBuffer wylie_minus_plusses_buf // = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString()); @@ -545,8 +553,6 @@ public class TibetanMachineWeb implements THDLWylieConstants { System.out.println("file Disappeared"); ThdlDebug.noteIffyCode(); } - - hasReadData = true; } /** @@ -634,6 +640,17 @@ public static SimpleAttributeSet getAttributeSet(int font) { return null; } +/** +* Gets the AttributeSet for the font we use for the Unicode we create +* in our TMW->Unicode conversion. This information is required in +* order to be able to put styled text into {@link TibetanDocument +* TibetanDocument}. +* @return a SimpleAttributeSet for the Unicode font - that is, a way +* of encoding the font itself */ +public static SimpleAttributeSet getUnicodeAttributeSet() { + return unicodeFontAttributeSet; +} + /** * Gets the AttributeSet for the given TibetanMachine font. * This information is required in order to be able to put styled @@ -1149,6 +1166,45 @@ private static DuffCode getUnusualTMtoTMW(int font, int code) { } } +private static final String Unicode_cr = "\r"; +private static final String Unicode_lf = "\n"; +private static final String Unicode_tab = "\t"; + + +/** Returns the sequence of Unicode corresponding to the given + TibetanMachineWeb font + (0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and + character(32-127). + + Null is returned for an existing TibetanMachineWeb glyph if and + only if that glyph has no corresponding Unicode mapping. Null is + returned if the input isn't valid. + + Only a few control characters are supported: '\r' (carriage + return), '\n' (line feed), and '\t' (tab). + */ +public static String mapTMWtoUnicode(int font, int ordinal) { + if (font < 0 || font > 9) + return null; + if (ordinal > 127) + return null; + if (ordinal < 32) { + if (ordinal == (int)'\r') + return Unicode_cr; + else if (ordinal == (int)'\n') + return Unicode_lf; + else if (ordinal == (int)'\t') + return Unicode_tab; + else { + // for robustness, just return a String consisting of the + // character which has the ordinal 'ordinal'. + ThdlDebug.noteIffyCode(); + return null; + } + } + return TMWtoUnicode[font][ordinal-32]; +} + /** * Gets the TibetanMachine font number for this font name. * @param name a font name