diff --git a/build.xml b/build.xml index 893d7bc..1c314e3 100644 --- a/build.xml +++ b/build.xml @@ -296,6 +296,13 @@ + + + + + diff --git a/source/options.txt b/source/options.txt index ae31dcd..2e3468e 100644 --- a/source/options.txt +++ b/source/options.txt @@ -23,6 +23,10 @@ # system-specific. thdl.user.options.directory = +# Set this to true if you want more messages (probably on the console) +# about what's going on. +thdl.verbose = false + # Set this to the full path of Jskad's working directory. When you go # to open a file or to save a file, this is the path you'll see first. thdl.Jskad.working.directory = diff --git a/source/org/thdl/tib/input/Jskad.java b/source/org/thdl/tib/input/Jskad.java index a231e0b..b3b8326 100644 --- a/source/org/thdl/tib/input/Jskad.java +++ b/source/org/thdl/tib/input/Jskad.java @@ -62,9 +62,6 @@ import org.thdl.util.ThdlLazyException; * @version 1.0 */ public class Jskad extends JPanel implements DocumentListener { - - private static final String rtfErrorMessage = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle. If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000. Older versions of Word produce RTF that Jskad\ncan more easily deal with. OpenOffice and StarOffice also\nproduce better-behaved RTF."; - /** the name of the property a developer should set to see low-level info on how keypresses in "Tibetan" input mode are being interpreted */ @@ -339,6 +336,28 @@ public class Jskad extends JPanel implements DocumentListener { toolsMenu.add(DevelItem); } + if (ThdlOptions.getBooleanOption("thdl.add.developer.options.to.menu")) { + toolsMenu.addSeparator(); + JMenuItem DevelItem = new JMenuItem("Check for non-TMW characters"); // DLC NOW: do it just in the selection + DevelItem.addActionListener(new ThdlActionListener() { + public void theRealActionPerformed(ActionEvent e) { + ((TibetanDocument)dp.getDocument()).findSomeNonTMWCharacters(0, -1); // entire document. + } + }); + toolsMenu.add(DevelItem); + } + + if (ThdlOptions.getBooleanOption("thdl.add.developer.options.to.menu")) { + toolsMenu.addSeparator(); + JMenuItem DevelItem = new JMenuItem("Fix curly braces RTF problem"); // DLC NOW: do it just in the selection + DevelItem.addActionListener(new ThdlActionListener() { + public void theRealActionPerformed(ActionEvent e) { + ((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1); // entire document + } + }); + toolsMenu.add(DevelItem); + } + menuBar.add(toolsMenu); JMenu infoMenu = new JMenu("Info"); @@ -668,7 +687,7 @@ public class Jskad extends JPanel implements DocumentListener { newRTF.dp.rtfEd.read(in, newRTF.dp.getDocument(), 0); } catch (Exception e) { JOptionPane.showMessageDialog(newFrame, - rtfErrorMessage); + TMW_RTF_TO_THDL_WYLIE.rtfErrorMessage); error = true; } in.close(); @@ -693,7 +712,7 @@ public class Jskad extends JPanel implements DocumentListener { dp.rtfEd.read(in, dp.getDocument(), 0); } catch (Exception e) { JOptionPane.showMessageDialog(this, - rtfErrorMessage); + TMW_RTF_TO_THDL_WYLIE.rtfErrorMessage); error = true; } diff --git a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIE.java b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIE.java new file mode 100644 index 0000000..41db623 --- /dev/null +++ b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIE.java @@ -0,0 +1,125 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). Portions created by the THDL are Copyright 2003 THDL. +All Rights Reserved. + +Contributor(s): ______________________________________. +*/ + +package org.thdl.tib.input; + +import java.io.*; + +import org.thdl.util.*; +import org.thdl.tib.text.*; + +/** + * TMW_RTF_TO_THDL_WYLIE is a command-line utility for converting TMW + * to Wylie. It is a TibetanMachineWeb-in-RichTextFormat to THDL + * Extended Wylie converter, more specifically. Invoke it with no + * parameters for usage information. + * @author David Chandler */ +public class TMW_RTF_TO_THDL_WYLIE { + static final String rtfErrorMessage = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle. If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000. Older versions of Word produce RTF that Jskad\ncan more easily deal with. OpenOffice and StarOffice may also\nproduce better-behaved RTF."; + + static { + // No need for the TMW fonts. + System.setProperty("thdl.rely.on.system.tmw.fonts", "true"); + } + + /** + * Runs the converter. */ + public static void main(String[] args) { + try { + boolean findSomeNonTMWMode = false; + boolean findAllNonTMWMode = false; + // Process arguments: + if ((args.length != 1 && args.length != 2) + || (args.length == 1 + && (args[0].equals("-h") + || args[0].equals("--help"))) + || (args.length == 2 + && !((findAllNonTMWMode + = args[0].equals("--find-all-non-tmw")) + || (findSomeNonTMWMode + = args[0].equals("--find-some-non-tmw"))))) { + System.out.println("TMW_RTF_TO_THDL_WYLIE [--find-all-non-tmw | --find-some-non-tmw] RTF_file |"); + System.out.println("TMW_RTF_TO_THDL_WYLIE [--version | -v | --help | -h]"); + System.out.println(""); + System.out.println("Distributed under the terms of the THDL Open Community License Version 1.0."); + System.out.println(""); + System.out.println("Usage:"); + System.out.println(" -v | --version for version info"); + System.out.println(" -h | --help for this message"); + System.out.println(" --find-all-non-tmw to locate all characters in the input document that are"); + System.out.println(" not in Tibetan Machine Web fonts, exit zero iff none found"); + System.out.println(" --find-some-non-tmw to locate all distinct characters in the input document"); + System.out.println(" not in Tibetan Machine Web fonts, exit zero iff none found"); + System.out.println(" Otherwise, needs one argument, the name of the TibetanMachineWeb RTF file."); + System.out.println(" Writes the Wylie transliteration of that file to standard output after"); + System.out.println(" dealing with the curly brace problem. Exit code is zero on success,"); + System.out.println(" nonzero otherwise."); + System.out.println(""); + System.out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a"); + System.out.println(" conversion so that you have confidence in the conversion's correctness."); + System.exit(77); + } + if (args[0].equals("--version") || args[0].equals("-v")) { + System.out.println("TMW_RTF_TO_THDL_WYLIE version 0.8"); + System.exit(77); + } + String tmwRtfPath = args[args.length - 1]; + + DuffPane dp = new DuffPane(); + // Read in the rtf file. + { + InputStream in = new FileInputStream(tmwRtfPath); + try { + dp.rtfEd.read(in, dp.getDocument(), 0); + } catch (Exception e) { + System.out.println("TMW_RTF_TO_THDL_WYLIE:\n" + + rtfErrorMessage); + System.exit(3); + } + in.close(); + } + + if (findAllNonTMWMode) { + // 0, -1 is the entire document. + System.exit(((TibetanDocument)dp.getDocument()).findAllNonTMWCharacters(0, -1)); + } else if (findSomeNonTMWMode) { + // 0, -1 is the entire document. + System.exit(((TibetanDocument)dp.getDocument()).findSomeNonTMWCharacters(0, -1)); + } else { // conversion mode + // Fix curly braces in the entire document: + ((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1); + + // Convert to THDL Wylie: + dp.toWylie(0, dp.getDocument().getLength()); + + // Write to standard output the result: + ((TibetanDocument)dp.getDocument()).writeRTFOutputStream(System.out); + + // Exit normally: + System.exit(0); + } + } catch (ThdlLazyException e) { + System.out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:"); + e.getRealException().printStackTrace(System.out); + System.exit(1); + } catch (IOException e) { + e.printStackTrace(); + System.exit(4); + } + } +} diff --git a/source/org/thdl/tib/input/package.html b/source/org/thdl/tib/input/package.html index 3b0ff6b..d3e59c0 100644 --- a/source/org/thdl/tib/input/package.html +++ b/source/org/thdl/tib/input/package.html @@ -17,17 +17,20 @@ Provides classes and methods for inputting Tibetan text.

-Designed for use with the Tibetan Computer -Company's free cross-platform TibetanMachineWeb fonts, this package -contains methods for inputting Tibetan using various keyboard -input methods, including true Wylie-based input, as well as -user-defined keyboards. +Designed for use with the Tibetan Computer Company's free +cross-platform Tibetan Machine Web fonts, this package contains +methods for inputting Tibetan using various keyboard input methods, +including true Wylie-based input, as well as user-defined keyboards.

-The package includes a simple Tibetan text editor, Jskad, -which can be run as an local application or embedded in a -web page. Jskad supports a wide range of functions, including -conversion back and forth between TibetanMachineWeb and -Extended Wylie. +The package includes a simple Tibetan text editor, Jskad, which can be +run as an local application or embedded in a web page.  Jskad +supports a wide range of functions, including conversion back and +forth between Tibetan Machine Web and Extended Wylie. +

+Also included is TMW_RTF_TO_THDL_WYLIE, a command-line utility for +converting Rich Text Format (RTF)a documents that use the Tibetan +Machine Web fonts into Extended Wylie.  This utility is aware of +quirks in Java Swing's RTF support and works around them.

Related Documentation

@see org.thdl.tib.text diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index dde1d73..066832d 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -26,6 +26,45 @@ import java.io.*; import org.thdl.util.ThdlDebug; +/** Represents a character meant to be rendered in a certain font. + * @author David Chandler + */ +class CharacterInAGivenFont { + private char character; + private String fontName; + public CharacterInAGivenFont(char ch, String font) { + character = ch; + fontName = font; + } + public CharacterInAGivenFont(String s, String font) { + if (s.length() != 1) + throw new Error("character in a given font was given a string " + + s + " in a given font"); + character = s.charAt(0); + fontName = font; + } + public boolean equals(Object x) { + return ((x instanceof CharacterInAGivenFont) + && ((CharacterInAGivenFont)x).character == character + && ((CharacterInAGivenFont)x).fontName.equals(fontName)); + } + public int hashCode() { + return (int)character + fontName.hashCode(); + } + public String toString() { + String characterRepresentation + = "'" + new Character(character).toString() + "'"; + if ('\n' == character) + characterRepresentation = "newline"; + if ('\r' == character) + characterRepresentation = "carriage return"; + return characterRepresentation + " in the font " + + ((null == fontName) + ? "_ERROR_FINDING_FONT_" + : fontName); + } +} + /** * A TibetanDocument is a styled document that knows about Tibetan and * will respect line breaks and the like. It allows you to insert @@ -202,4 +241,132 @@ public class TibetanDocument extends DefaultStyledDocument { return ""; } + + /** Prints to standard output a list of all the indices of + characters that are not in a TMW font within the range [start, + end). Using a negative number for end means that this will + run to the end of the document. SPEED_FIXME: might be faster + to run over the elements, if they are one per font. + @return 1 if at least one non-TMW character was found in + the specified range, zero if none were, -1 on error. */ + public int findAllNonTMWCharacters(int begin, int end) { + if (end < 0) + end = getLength(); + if (begin >= end) + return 0; + int i = begin; + int returnValue = 0; + try { + while (i < end) { + AttributeSet attr = getCharacterElement(i).getAttributes(); + String fontName = StyleConstants.getFontFamily(attr); + if ((0 == TibetanMachineWeb.getTMWFontNumber(fontName))) { + returnValue = 1; + CharacterInAGivenFont cgf + = new CharacterInAGivenFont(getText(i, 1), fontName); + System.out.println("non-TMW character " + + cgf + " at location " + i); + } + i++; + } + } catch (BadLocationException ble) { + ble.printStackTrace(); + ThdlDebug.noteIffyCode(); + returnValue = -1; + } + return returnValue; + } + + /** Finds the first occurrence of a non-TMW character in a given + font and prints it to System.out. If you have a Tahoma + newline and an Arial newline, the first occurrence of each + will be reported. + +

Works within the range [start, end). Using a negative + number for end means that this will run to the end of the + document. SPEED_FIXME: might be faster to run over the + elements, if they are one per font. + @return 1 if at least one non-TMW character was found in + the specified range, zero if none were, -1 on error. */ + public int findSomeNonTMWCharacters(int begin, int end) { + if (end < 0) + end = getLength(); + if (begin >= end) + return 0; + int i = begin; + int returnValue = 0; + try { + HashMap cgfTable = new HashMap(); + while (i < end) { + AttributeSet attr = getCharacterElement(i).getAttributes(); + String fontName = StyleConstants.getFontFamily(attr); + if ((0 == TibetanMachineWeb.getTMWFontNumber(fontName))) { + returnValue = 1; + CharacterInAGivenFont cgf + = new CharacterInAGivenFont(getText(i, 1), fontName); + if (!cgfTable.containsKey(cgf)) { + cgfTable.put(cgf, "yes this character appears once"); + System.out.println("non-TMW character " + + cgf + " appears first at location " + i); + } + } + i++; + } + } catch (BadLocationException ble) { + ble.printStackTrace(); + ThdlDebug.noteIffyCode(); + returnValue = -1; + } + return returnValue; + } + + private static final DuffData[] leftCurlyBraceTMW + = new DuffData[] { new DuffData("{", 1) }; + private static final DuffData[] rightCurlyBraceTMW + = new DuffData[] { new DuffData("}", 1) }; + private static final DuffData[] backslashTMW + = new DuffData[] { new DuffData("\\", 2) }; + /** This is a band-aid used to help Jskad fix RTF files that are + mostly TMW but have some Tahoma characters that should be TMW. + Replaces '{', '}', and '\\' characters with the correct + TibetanMachineWeb. Works within the range [start, end). + Using a negative number for end means that this will run to + the end of the document. Be sure to set the size for Tibetan + as you like it before using this. SPEED_FIXME: might be + faster to run over the elements, if they are one per font. */ + public void replaceTahomaCurlyBracesAndBackslashes(int begin, int end) { + if (end < 0) + end = getLength(); + if (begin >= end) + return; + int i = begin; + try { + while (i < end) { + AttributeSet attr = getCharacterElement(i).getAttributes(); + String fontName = StyleConstants.getFontFamily(attr); + if (fontName.equals("Tahoma")) { + DuffData[] toReplaceWith = null; + switch (getText(i, 1).charAt(0)) { + case '{': + toReplaceWith = leftCurlyBraceTMW; + break; + case '}': + toReplaceWith = rightCurlyBraceTMW; + break; + case '\\': + toReplaceWith = backslashTMW; + break; + } + if (null != toReplaceWith) { + insertDuff(i, toReplaceWith); + remove(i+1, 1); + } + } + i++; + } + } catch (BadLocationException ble) { + ble.printStackTrace(); + ThdlDebug.noteIffyCode(); + } + } } diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index a366076..787777c 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -282,8 +282,10 @@ public class TibetanMachineWeb implements THDLWylieConstants { InputStreamReader isr = new InputStreamReader(url.openStream()); BufferedReader in = new BufferedReader(isr); - System.out.println("Reading Tibetan Machine Web code table " - + fileName); + if (ThdlOptions.getBooleanOption("thdl.verbose")) { + System.out.println("Reading Tibetan Machine Web code table " + + fileName); + } String line; boolean hashOn = false; boolean isSanskrit = false; //FIXME: this is never read. @@ -419,6 +421,7 @@ public class TibetanMachineWeb implements THDLWylieConstants { } catch (IOException e) { System.out.println("file Disappeared"); + ThdlDebug.noteIffyCode(); } hasReadData = true;