diff --git a/source/org/thdl/tib/input/ConverterGUI.java b/source/org/thdl/tib/input/ConverterGUI.java index a4da351..d85c474 100644 --- a/source/org/thdl/tib/input/ConverterGUI.java +++ b/source/org/thdl/tib/input/ConverterGUI.java @@ -67,6 +67,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants { "Conversion failed", JOptionPane.ERROR_MESSAGE); return false; + } else if (44 == returnCode) { + JOptionPane.showMessageDialog(cd, + "Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct EWTS transliteration.", + "Attention required", + JOptionPane.ERROR_MESSAGE); + return false; } else if (43 == returnCode) { JOptionPane.showMessageDialog(cd, "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?", diff --git a/source/org/thdl/tib/input/DuffPane.java b/source/org/thdl/tib/input/DuffPane.java index 4f7cf1d..e27c229 100644 --- a/source/org/thdl/tib/input/DuffPane.java +++ b/source/org/thdl/tib/input/DuffPane.java @@ -299,12 +299,11 @@ public class DuffPane extends TibetanPane implements FocusListener { private void setupEditor() { rtfBoard = getToolkit().getSystemClipboard(); - newDocument(); - romanFontFamily = ThdlOptions.getStringOption("thdl.default.roman.font.face", "Serif"); romanFontSize = defaultRomanFontSize(); - setRomanAttributeSet(romanFontFamily, romanFontSize); + + newDocument(); caret = getCaret(); @@ -471,6 +470,9 @@ public class DuffPane extends TibetanPane implements FocusListener { StyleConstants.setFontFamily(defaultStyle, "TibetanMachineWeb"); StyleConstants.setFontSize(defaultStyle, defaultTibFontSize()); + setRomanAttributeSet(romanFontFamily, romanFontSize); + + newGlyphList.clear(); initKeyboard(); } @@ -773,7 +775,10 @@ public class DuffPane extends TibetanPane implements FocusListener { return; } - String wylie = TibetanMachineWeb.getWylieForGlyph(fontNum, k); + String wylie + = TibetanMachineWeb.getWylieForGlyph(fontNum, + k, + TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (TibetanMachineWeb.isWyliePunc(wylie)) { if (charList.isEmpty() && !TibetanMachineWeb.isAChenRequiredBeforeVowel()) { printAChenWithVowel(v); @@ -873,7 +878,10 @@ public class DuffPane extends TibetanPane implements FocusListener { if (k<32 || k>126) //if previous character is formatting or some other non-character break special_bindu_block; - String wylie = TibetanMachineWeb.getWylieForGlyph(fontNum, k); + String wylie + = TibetanMachineWeb.getWylieForGlyph(fontNum, + k, + TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (!TibetanMachineWeb.isWylieVowel(wylie)) break special_bindu_block; @@ -1587,10 +1595,14 @@ public void paste(int offset) { * Converts the entire associated document into Extended Wylie. If the * document consists of both Tibetan and non-Tibetan fonts, however, * the conversion stops at the first non-Tibetan font. +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to one of these glyphs, then noSuchWylie[0] will be +* set to true * @return the string of Wylie corresponding to the associated document * @see org.thdl.tib.text.TibetanDocument#getWylie() */ - public String getWylie() { - return getTibDoc().getWylie(); + public String getWylie(boolean noSuchWylie[]) { + return getTibDoc().getWylie(noSuchWylie); } diff --git a/source/org/thdl/tib/input/Jskad.java b/source/org/thdl/tib/input/Jskad.java index 2127934..5401af2 100644 --- a/source/org/thdl/tib/input/Jskad.java +++ b/source/org/thdl/tib/input/Jskad.java @@ -1076,9 +1076,14 @@ public class Jskad extends JPanel implements DocumentListener { private void toWylie() { Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); - ((TibetanDocument)dp.getDocument()).toWylie(dp.getSelectionStart(), - dp.getSelectionEnd(), - new long[] { 0 }); + if (!((TibetanDocument)dp.getDocument()).toWylie(dp.getSelectionStart(), + dp.getSelectionEnd(), + new long[] { 0 })) { + JOptionPane.showMessageDialog(Jskad.this, + "Though some Extended Wylie has been produced, it\ncontains ugly error messages like\n\"<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct EWTS transliteration.", + "Attention Required", + JOptionPane.ERROR_MESSAGE); + } Jskad.this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); } diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index 1edf910..9c54a2a 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -20,6 +20,8 @@ package org.thdl.tib.input; import java.io.*; import javax.swing.text.rtf.RTFEditorKit; +import javax.swing.text.SimpleAttributeSet; +import javax.swing.text.StyleConstants; import org.thdl.util.*; import org.thdl.tib.text.*; @@ -122,6 +124,10 @@ public class TibetanConverter implements FontConverterConstants { out.println(" result to standard output (after dealing with the curly brace problem if"); out.println(" the input is TibetanMachineWeb). Exit code is zero on success, 42 if some"); out.println(" glyphs couldn't be converted (in which case the output is just those glyphs),"); + out.println(" 44 if a TMW->Wylie conversion ran into some glyphs that couldn't be"); + out.println(" converted, in which case ugly error messages like"); + out.println(" \"<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode...\""); + out.println(" are in your document waiting for your personal attention,"); out.println(" 43 if not even one glyph found was eligible for this conversion, which means"); out.println(" that you probably selected the wrong conversion or the wrong document, or "); out.println(" nonzero otherwise."); @@ -186,6 +192,16 @@ public class TibetanConverter implements FontConverterConstants { honored. */ static int reallyConvert(InputStream in, PrintStream out, String ct) { TibetanDocument tdoc = new TibetanDocument(); + { + SimpleAttributeSet ras = new SimpleAttributeSet(); + StyleConstants.setFontFamily(ras, + ThdlOptions.getStringOption("thdl.default.roman.font.face", + "Serif")); + StyleConstants.setFontSize(ras, + ThdlOptions.getIntegerOption("thdl.default.roman.font.size", + 14)); + tdoc.setRomanAttributeSet(ras); + } try { // Read in the rtf file. if (debug) System.err.println("Start: reading in old RTF file"); @@ -253,9 +269,11 @@ public class TibetanConverter implements FontConverterConstants { long numAttemptedReplacements[] = new long[] { 0 }; if (TMW_TO_WYLIE == ct) { // Convert to THDL Wylie: - tdoc.toWylie(0, - tdoc.getLength(), - numAttemptedReplacements); + if (!tdoc.toWylie(0, + tdoc.getLength(), + numAttemptedReplacements)) { + exitCode = 44; + } } else if (TMW_TO_UNI == ct) { StringBuffer errors = new StringBuffer(); // Convert to Unicode: diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index 8618ac6..38a161b 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -297,6 +297,12 @@ public class TibTextUtils implements THDLWylieConstants { return s; } + /** An array containing one boolean value. Pass this to + TibetanMachineWeb.getWylieForGlyph(..) if you don't care if a + certain glyph has corresponding Wylie or not. */ + public static final boolean[] weDoNotCareIfThereIsCorrespondingWylieOrNot + = new boolean[] { false }; + /** * Converts a string of Extended Wylie into {@link DuffData DuffData}. * @param wylie the Wylie you want to convert @@ -371,7 +377,7 @@ public class TibTextUtils implements THDLWylieConstants { vowel_block: { if (size > 1) { dc = (DuffCode)glyphs.get(glyphs.size()-1); - if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc))) { + if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) { DuffCode dc_2 = (DuffCode)glyphs.removeLast(); DuffCode dc_1 = (DuffCode)glyphs.removeLast(); glyphs.addAll(getVowel(dc_1, dc_2, next)); @@ -805,9 +811,12 @@ public class TibTextUtils implements THDLWylieConstants { * @param glyphList a list of TibetanMachineWeb glyphs, i.e. {@link * org.thdl.tib.text.DuffCode DuffCodes}. Pass in an ArrayList if you * care at all for speed. -* @return the Wylie string corresponding to this glyph list, with 'a' inserted. -*/ - public static String withA(java.util.List glyphList) { +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to these glyphs, then noSuchWylie[0] will be set to +* true +* @return the Wylie string corresponding to this glyph list, with 'a' inserted. */ + public static String withA(java.util.List glyphList, boolean noSuchWylie[]) { StringBuffer sb = new StringBuffer(); int size = glyphList.size(); String wylie; @@ -818,16 +827,16 @@ public class TibTextUtils implements THDLWylieConstants { return ""; case 1: //only one glyph: 'a' goes after it - wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0)); + wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0), noSuchWylie); sb.append(wylie); sb.append(aVowelToUseAfter(wylie)); return sb.toString(); case 2: //two glyphs: 'a' either goes after first or after both - lastWylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0)); + lastWylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0), noSuchWylie); sb.append(lastWylie); - wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(1)); + wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(1), noSuchWylie); if (TibetanMachineWeb.isWylieRight(wylie)) { sb.append(aVowelToUseAfter(lastWylie)); sb.append(wylie); @@ -870,17 +879,17 @@ public class TibTextUtils implements THDLWylieConstants { StringBuffer tailEndWylie = null; int effectiveSize = size - 2; while (effectiveSize >= 0 - && TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize)).equals(ACHUNG)) { + && TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize), noSuchWylie).equals(ACHUNG)) { if (null == tailEndWylie) tailEndWylie = new StringBuffer(); // prepend: tailEndWylie.insert(0, ACHUNG + aVowelToUseAfter(ACHUNG) - + TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1))); + + TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1), noSuchWylie)); effectiveSize -= 2; } if (null != tailEndWylie) { - return (withA(glyphList.subList(0, effectiveSize + 2)) + return (withA(glyphList.subList(0, effectiveSize + 2), noSuchWylie) + tailEndWylie.toString()); } } @@ -890,17 +899,17 @@ public class TibTextUtils implements THDLWylieConstants { // this is illegal because it doesn't begin // with a prefix: || (size == 4 - && (!TibetanMachineWeb.isWylieLeft(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0))) + && (!TibetanMachineWeb.isWylieLeft(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0), weDoNotCareIfThereIsCorrespondingWylieOrNot)) // this is illegal because it doesn't have a // suffix in the proper place, e.g. mjskad: - || !TibetanMachineWeb.isWylieRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 2))) + || !TibetanMachineWeb.isWylieRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 2), weDoNotCareIfThereIsCorrespondingWylieOrNot)) // this is illegal because it doesn't have a // postsuffix in the proper place, // e.g. 'lan.g, which would otherwise become // 'lang (with nga, not na and then ga): - || !TibetanMachineWeb.isWylieFarRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 1))))))) { + || !TibetanMachineWeb.isWylieFarRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 1), weDoNotCareIfThereIsCorrespondingWylieOrNot)))))) { for (int i = 0; i < size; i++) { - wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i)); + wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i), noSuchWylie); if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie) || (i != 0 && wylie.equals(ACHEN))) sb.append(WYLIE_DISAMBIGUATING_KEY); @@ -914,7 +923,7 @@ public class TibTextUtils implements THDLWylieConstants { /* Else, chew up all the glyphs except for the last two. Then decide. */ int i = 0; while (i+2 < size) { - wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i)); + wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i), noSuchWylie); if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, wylie) || (i != 0 && wylie.equals(ACHEN))) sb.append(WYLIE_DISAMBIGUATING_KEY); @@ -925,9 +934,9 @@ public class TibTextUtils implements THDLWylieConstants { } String wylie1 - = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i)); + = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i), noSuchWylie); String wylie2 - = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i + 1)); + = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i + 1), noSuchWylie); if (size == 3) { String wylie0 = lastWylie; @@ -1047,9 +1056,12 @@ public class TibTextUtils implements THDLWylieConstants { * org.thdl.tib.text.DuffCode DuffCodes} * @param isBeforeVowel true if these glyphs occur before a vowel, * false if these glyphs occur after a vowel -* @return the Wylie string corresponding to this glyph list -*/ - public static String withoutA(java.util.ArrayList glyphList, boolean isBeforeVowel) { +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to these glyphs, then noSuchWylie[0] will be set to +* true +* @return the Wylie string corresponding to this glyph list */ + public static String withoutA(java.util.ArrayList glyphList, boolean isBeforeVowel, boolean noSuchWylie[]) { StringBuffer sb = new StringBuffer(); Iterator iter = glyphList.iterator(); DuffCode dc; @@ -1058,7 +1070,7 @@ public class TibTextUtils implements THDLWylieConstants { while (iter.hasNext()) { dc = (DuffCode)iter.next(); - currWylie = TibetanMachineWeb.getWylieForGlyph(dc); + currWylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie); if (TibetanMachineWeb.isAmbiguousWylie(lastWylie, currWylie) || (!lastWylie.equals("") @@ -1084,9 +1096,12 @@ public class TibTextUtils implements THDLWylieConstants { /** * Gets the Extended Wylie for a sequence of glyphs. * @param dcs an array of glyphs -* @return the Extended Wylie corresponding to these glyphs -*/ - public static String getWylie(DuffCode[] dcs) { +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to these glyphs, then noSuchWylie[0] will be set to +* true +* @return the Extended Wylie corresponding to these glyphs */ + public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) { if (dcs.length == 0) return null; @@ -1108,9 +1123,9 @@ public class TibTextUtils implements THDLWylieConstants { if (wylieBuffer.length() > 0 || !glyphList.isEmpty()) { String thisPart; if (needsVowel) - thisPart = withA(glyphList); + thisPart = withA(glyphList, noSuchWylie); else - thisPart = withoutA(glyphList, false); + thisPart = withoutA(glyphList, false, noSuchWylie); wylieBuffer.append(thisPart); glyphList.clear(); @@ -1120,7 +1135,7 @@ public class TibTextUtils implements THDLWylieConstants { wylieBuffer.append(ch); } else { - wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]); + wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuchWylie); boolean containsBindu = false; if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == BINDU) { @@ -1138,9 +1153,9 @@ public class TibTextUtils implements THDLWylieConstants { } else { String thisPart; if (needsVowel) - thisPart = withA(glyphList); + thisPart = withA(glyphList, noSuchWylie); else - thisPart = withoutA(glyphList, false); + thisPart = withoutA(glyphList, false, noSuchWylie); wylieBuffer.append(thisPart); wylieBuffer.append(wylie); //append the punctuation @@ -1185,7 +1200,7 @@ public class TibTextUtils implements THDLWylieConstants { if (0 != glyphCount) { DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1); - String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc); + String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc, noSuchWylie); if (top_wylie.equals(ACHEN)) { glyphList.remove(glyphCount-1); @@ -1198,15 +1213,15 @@ public class TibTextUtils implements THDLWylieConstants { } } - if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(ACHUNG)) { - String thisPart = withoutA(glyphList, true); + if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc, noSuchWylie).equals(ACHUNG)) { + String thisPart = withoutA(glyphList, true, noSuchWylie); wylieBuffer.append(thisPart); //append consonants in glyphList } else { glyphCount = glyphList.size(); glyphList.remove(glyphCount-1); if (glyphCount-1 != 0) { - String thisPart = withA(glyphList); + String thisPart = withA(glyphList, noSuchWylie); wylieBuffer.append(thisPart); } @@ -1231,7 +1246,7 @@ public class TibTextUtils implements THDLWylieConstants { if (containsBindu) { isLastVowel = false; - wylieBuffer.append(withoutA(glyphList, false)); + wylieBuffer.append(withoutA(glyphList, false, noSuchWylie)); wylieBuffer.append(BINDU); //append the bindu glyphList.clear(); } @@ -1243,9 +1258,9 @@ public class TibTextUtils implements THDLWylieConstants { if (!glyphList.isEmpty()) { String thisPart; if (needsVowel) - thisPart = withA(glyphList); + thisPart = withA(glyphList, noSuchWylie); else - thisPart = withoutA(glyphList, false); + thisPart = withoutA(glyphList, false, noSuchWylie); wylieBuffer.append(thisPart); } diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index 53f3ee0..b5b9249 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -246,10 +246,13 @@ public class TibetanDocument extends DefaultStyledDocument { * If the document consists of both Tibetan and * non-Tibetan fonts, however, the conversion stops * at the first non-Tibetan font. -* @return the string of Wylie corresponding to this document -*/ - public String getWylie() { - return getWylie(0, getLength()); +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to one of these glyphs, then noSuchWylie[0] will be +* set to true +* @return the string of Wylie corresponding to this document */ + public String getWylie(boolean noSuchWylie[]) { + return getWylie(0, getLength(), noSuchWylie); } /** @@ -259,9 +262,12 @@ public class TibetanDocument extends DefaultStyledDocument { * at the first non-Tibetan font. * @param begin the beginning of the region to convert * @param end the end of the region to convert -* @return the string of Wylie corresponding to this document -*/ - public String getWylie(int begin, int end) { +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to one of these glyphs, then noSuchWylie[0] will be +* set to true +* @return the string of Wylie corresponding to this document */ + public String getWylie(int begin, int end, boolean noSuchWylie[]) { AttributeSet attr; String fontName; int fontNum; @@ -287,7 +293,7 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array)); + wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); dcs.clear(); } wylieBuffer.append(ch); @@ -298,7 +304,7 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array)); + wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); dcs.clear(); } } @@ -313,7 +319,7 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array)); + wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); } return wylieBuffer.toString(); } @@ -761,66 +767,6 @@ public class TibetanDocument extends DefaultStyledDocument { return ceh.errorReturn; } - /** Appends to sb a text representation of the characters (glyphs) - in this document in the range [begin, end). In this - representation, \tmwXYYY and \tmXYYY are used for TMW and TM - glyphs, respectively. \otherYYY is used for all other - characters. X is zero-based; Y is the decimal glyph number. - After every 10 characters, '\n' is added. Note well that some - TM oddballs (see TibetanMachineWeb.getUnusualTMtoTMW(int, - int)) are not handled well, so you may get \tm08222 etc. */ - public void getTextRepresentation(int begin, int end, StringBuffer sb) { - if (end < 0) - end = getLength(); - if (begin >= end) - return; // nothing to do - - // For speed, do as few replaces as possible. To preserve - // formatting, we'll try to replace one paragraph at a time. - // But we *must* replace when we hit a different font (TMW3 as - // opposed to TMW2, e.g.), so we'll likely replace many times - // per paragraph. One very important optimization is that we - // don't have to treat TMW3.45 or TMW3.32 as a different font - // than TMW.33 -- that's because each of the ten TMW fonts has - // the same glyph at position 32 (space) and the same glyph at - // position 45 (tsheg). Note that we're building up a big - // StringBuffer; we're trading space for time. - try { - int i = begin; - int tenCount = 0; - while (i < end) { - AttributeSet attr = getCharacterElement(i).getAttributes(); - String fontName = StyleConstants.getFontFamily(attr); - int tmwFontNum - = TibetanMachineWeb.getTMWFontNumber(fontName); - int tmFontNum; - if (tmwFontNum != 0) { - sb.append("\\tmw" + (tmwFontNum - 1)); - } else if ((tmFontNum - = TibetanMachineWeb.getTMFontNumber(fontName)) - != 0) { - sb.append("\\tm" + (tmFontNum - 1)); - } else { - // non-tmw, non-tm character: - sb.append("\\other"); - } - int ordinal = (int)getText(i,1).charAt(0); - if (ordinal < 100) - sb.append('0'); - if (ordinal < 10) - sb.append('0'); - sb.append("" + ordinal); - if ((++tenCount) % 10 == 0) { - tenCount = 0; - sb.append('\n'); - } - i++; - } - } catch (BadLocationException e) { - throw new ThdlLazyException(e); - } - } - /** See the sole caller, convertHelper. */ private void convertHelperHelper(int begin, int end, boolean toTM, boolean toUnicode, StringBuffer errors, @@ -1101,13 +1047,17 @@ public class TibetanDocument extends DefaultStyledDocument { * @param end the point at which to stop converting to Wylie * @param numAttemptedReplacements an array that contains one element; * this first element will be, upon exit, incremented by the number of -* TMW glyphs that we encountered and attempted to convert to Wylie */ - public void toWylie(int start, int end, - long numAttemptedReplacements[]) { +* TMW glyphs that we encountered and attempted to convert to Wylie +* @return true if entirely successful, false if we put some +* "<<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert +* DuffCode..." text into the document */ + public boolean toWylie(int start, int end, + long numAttemptedReplacements[]) { if (start >= end) - return; + return true; try { + boolean noSuchWylie[] = new boolean[] { false }; DuffCode[] any_dc_array = new DuffCode[0]; DuffCode[] dc_array; Position endPos = createPosition(end); @@ -1124,8 +1074,9 @@ public class TibetanDocument extends DefaultStyledDocument { if (i != start) { dc_array = (DuffCode[])dcs.toArray(any_dc_array); remove(start, i-start); + ThdlDebug.verify(getRomanAttributeSet() != null); insertString(start, - TibTextUtils.getWylie(dc_array), + TibTextUtils.getWylie(dc_array, noSuchWylie), getRomanAttributeSet()); dcs.clear(); } @@ -1138,9 +1089,11 @@ public class TibetanDocument extends DefaultStyledDocument { i++; } + return !noSuchWylie[0]; } catch (BadLocationException ble) { ble.printStackTrace(); ThdlDebug.noteIffyCode(); + return false; } } @@ -1172,6 +1125,66 @@ public class TibetanDocument extends DefaultStyledDocument { return (Element[])v.toArray(arrayType); } + /** Appends to sb a text representation of the characters (glyphs) + in this document in the range [begin, end). In this + representation, \tmwXYYY and \tmXYYY are used for TMW and TM + glyphs, respectively. \otherYYY is used for all other + characters. X is zero-based; Y is the decimal glyph number. + After every 10 characters, '\n' is added. Note well that some + TM oddballs (see TibetanMachineWeb.getUnusualTMtoTMW(int, + int)) are not handled well, so you may get \tm08222 etc. */ + public void getTextRepresentation(int begin, int end, StringBuffer sb) { + if (end < 0) + end = getLength(); + if (begin >= end) + return; // nothing to do + + // For speed, do as few replaces as possible. To preserve + // formatting, we'll try to replace one paragraph at a time. + // But we *must* replace when we hit a different font (TMW3 as + // opposed to TMW2, e.g.), so we'll likely replace many times + // per paragraph. One very important optimization is that we + // don't have to treat TMW3.45 or TMW3.32 as a different font + // than TMW.33 -- that's because each of the ten TMW fonts has + // the same glyph at position 32 (space) and the same glyph at + // position 45 (tsheg). Note that we're building up a big + // StringBuffer; we're trading space for time. + try { + int i = begin; + int tenCount = 0; + while (i < end) { + AttributeSet attr = getCharacterElement(i).getAttributes(); + String fontName = StyleConstants.getFontFamily(attr); + int tmwFontNum + = TibetanMachineWeb.getTMWFontNumber(fontName); + int tmFontNum; + if (tmwFontNum != 0) { + sb.append("\\tmw" + (tmwFontNum - 1)); + } else if ((tmFontNum + = TibetanMachineWeb.getTMFontNumber(fontName)) + != 0) { + sb.append("\\tm" + (tmFontNum - 1)); + } else { + // non-tmw, non-tm character: + sb.append("\\other"); + } + int ordinal = (int)getText(i,1).charAt(0); + if (ordinal < 100) + sb.append('0'); + if (ordinal < 10) + sb.append('0'); + sb.append("" + ordinal); + if ((++tenCount) % 10 == 0) { + tenCount = 0; + sb.append('\n'); + } + i++; + } + } catch (BadLocationException e) { + throw new ThdlLazyException(e); + } + } + /** For debugging only. Start with an empty document, and call this on it. You'll get all the TibetanMachine glyphs inserted, in order, into your document. */ diff --git a/source/org/thdl/tib/text/TibetanHTML.java b/source/org/thdl/tib/text/TibetanHTML.java index a117d1e..d4d64a3 100644 --- a/source/org/thdl/tib/text/TibetanHTML.java +++ b/source/org/thdl/tib/text/TibetanHTML.java @@ -71,7 +71,7 @@ public class TibetanHTML { break; } htmlBuffer.append(""); - String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k]); + String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k], TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (TibetanMachineWeb.isWyliePunc(wylie)) htmlBuffer.append(""); } else { @@ -138,7 +138,7 @@ public class TibetanHTML { htmlBuffer.append(c[k]); break; } - String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k]); + String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k], TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (TibetanMachineWeb.isWyliePunc(wylie)) htmlBuffer.append(""); } else { @@ -209,7 +209,7 @@ public class TibetanHTML { htmlBuffer.append(c[k]); break; } - String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k]); + String wylie = TibetanMachineWeb.getWylieForGlyph(duffData[i].font, c[k], TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (TibetanMachineWeb.isWyliePunc(wylie)) htmlBuffer.append(""); } else { diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index 0659571..7e6ecf2 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -1414,12 +1414,17 @@ private static String getTMWToWylieErrorString(DuffCode dc) { * glyph you want the Wylie of * @param code the ordinal, minus 32, of the TibetanMachineWeb glyph * you want the Wylie of +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to the glyph, then noSuchWylie[0] will be set to true * @return the Wylie value corresponding to the * glyph denoted by font, code */ -public static String getWylieForGlyph(int font, int code) { + public static String getWylieForGlyph(int font, int code, + boolean noSuchWylie[]) { String hashKey = getHashKeyForGlyph(font, code); if (hashKey == null) { + noSuchWylie[0] = true; return getTMWToWylieErrorString(new DuffCode(font, (char)code)); } return wylieForGlyph(hashKey); @@ -1429,12 +1434,15 @@ public static String getWylieForGlyph(int font, int code) { * Gets the Extended Wylie value for this glyph. * @param dc the DuffCode of the glyph you want * the Wylie of +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie +* corresponding to the glyph, then noSuchWylie[0] will be set to true * @return the Wylie value corresponding to the -* glyph denoted by dc -*/ -public static String getWylieForGlyph(DuffCode dc) { +* glyph denoted by dc */ +public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) { String hashKey = getHashKeyForGlyph(dc); if (hashKey == null) { + noSuchWylie[0] = true; return getTMWToWylieErrorString(dc); } return wylieForGlyph(hashKey); @@ -1637,7 +1645,9 @@ public static String getAVowel() { * @return true if the glyph is a top-hanging (superscript) vowel (i, * u, e, o, ai, or ao) and false if not */ public static boolean isTopVowel(DuffCode dc) { - String wylie = getWylieForGlyph(dc); + String wylie + = getWylieForGlyph(dc, + TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot); if (top_vowels.contains(wylie)) return true;