I've added a command-line converter,

org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIE.  It converts RTF files
consisting of TMW characters to the corresponding THDL Extended Wylie.

It supports --find-some-non-tmw mode, which allows you to ensure that no
unusual characters will spoil the conversion.  The converter has built-in
intelligence that allows it to handle Tahoma '{', '}', and '\\' characters
properly.

The converter works on mixed Roman/TMW also, but --find-some-non-tmw
and --find-all-non-tmw modes are not as useful.

Invoke org.thdl.tib.input.TMW_RTF_TO_THDL_WYLIE, which resides in
Jskad's jar, with no command-line options to see usage information.
This commit is contained in:
dchandler 2003-05-18 14:14:47 +00:00
parent 17ea8fdf2a
commit e2a9720d9b
7 changed files with 345 additions and 17 deletions

View file

@ -62,9 +62,6 @@ import org.thdl.util.ThdlLazyException;
* @version 1.0
*/
public class Jskad extends JPanel implements DocumentListener {
private static final String rtfErrorMessage = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle. If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000. Older versions of Word produce RTF that Jskad\ncan more easily deal with. OpenOffice and StarOffice also\nproduce better-behaved RTF.";
/** the name of the property a developer should set to see
low-level info on how keypresses in "Tibetan" input mode are
being interpreted */
@ -339,6 +336,28 @@ public class Jskad extends JPanel implements DocumentListener {
toolsMenu.add(DevelItem);
}
if (ThdlOptions.getBooleanOption("thdl.add.developer.options.to.menu")) {
toolsMenu.addSeparator();
JMenuItem DevelItem = new JMenuItem("Check for non-TMW characters"); // DLC NOW: do it just in the selection
DevelItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).findSomeNonTMWCharacters(0, -1); // entire document.
}
});
toolsMenu.add(DevelItem);
}
if (ThdlOptions.getBooleanOption("thdl.add.developer.options.to.menu")) {
toolsMenu.addSeparator();
JMenuItem DevelItem = new JMenuItem("Fix curly braces RTF problem"); // DLC NOW: do it just in the selection
DevelItem.addActionListener(new ThdlActionListener() {
public void theRealActionPerformed(ActionEvent e) {
((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1); // entire document
}
});
toolsMenu.add(DevelItem);
}
menuBar.add(toolsMenu);
JMenu infoMenu = new JMenu("Info");
@ -668,7 +687,7 @@ public class Jskad extends JPanel implements DocumentListener {
newRTF.dp.rtfEd.read(in, newRTF.dp.getDocument(), 0);
} catch (Exception e) {
JOptionPane.showMessageDialog(newFrame,
rtfErrorMessage);
TMW_RTF_TO_THDL_WYLIE.rtfErrorMessage);
error = true;
}
in.close();
@ -693,7 +712,7 @@ public class Jskad extends JPanel implements DocumentListener {
dp.rtfEd.read(in, dp.getDocument(), 0);
} catch (Exception e) {
JOptionPane.showMessageDialog(this,
rtfErrorMessage);
TMW_RTF_TO_THDL_WYLIE.rtfErrorMessage);
error = true;
}

View file

@ -0,0 +1,125 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.input;
import java.io.*;
import org.thdl.util.*;
import org.thdl.tib.text.*;
/**
* TMW_RTF_TO_THDL_WYLIE is a command-line utility for converting TMW
* to Wylie. It is a TibetanMachineWeb-in-RichTextFormat to THDL
* Extended Wylie converter, more specifically. Invoke it with no
* parameters for usage information.
* @author David Chandler */
public class TMW_RTF_TO_THDL_WYLIE {
static final String rtfErrorMessage = "The Rich Text Format (RTF) file selected contains constructs that\nJskad cannot handle. If you got the RTF file from saving a Word\ndocument as RTF, try saving that same document as RTF in\nWord 2000 instead of Word XP or in Word 97 instead of\nWord 2000. Older versions of Word produce RTF that Jskad\ncan more easily deal with. OpenOffice and StarOffice may also\nproduce better-behaved RTF.";
static {
// No need for the TMW fonts.
System.setProperty("thdl.rely.on.system.tmw.fonts", "true");
}
/**
* Runs the converter. */
public static void main(String[] args) {
try {
boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false;
// Process arguments:
if ((args.length != 1 && args.length != 2)
|| (args.length == 1
&& (args[0].equals("-h")
|| args[0].equals("--help")))
|| (args.length == 2
&& !((findAllNonTMWMode
= args[0].equals("--find-all-non-tmw"))
|| (findSomeNonTMWMode
= args[0].equals("--find-some-non-tmw"))))) {
System.out.println("TMW_RTF_TO_THDL_WYLIE [--find-all-non-tmw | --find-some-non-tmw] RTF_file |");
System.out.println("TMW_RTF_TO_THDL_WYLIE [--version | -v | --help | -h]");
System.out.println("");
System.out.println("Distributed under the terms of the THDL Open Community License Version 1.0.");
System.out.println("");
System.out.println("Usage:");
System.out.println(" -v | --version for version info");
System.out.println(" -h | --help for this message");
System.out.println(" --find-all-non-tmw to locate all characters in the input document that are");
System.out.println(" not in Tibetan Machine Web fonts, exit zero iff none found");
System.out.println(" --find-some-non-tmw to locate all distinct characters in the input document");
System.out.println(" not in Tibetan Machine Web fonts, exit zero iff none found");
System.out.println(" Otherwise, needs one argument, the name of the TibetanMachineWeb RTF file.");
System.out.println(" Writes the Wylie transliteration of that file to standard output after");
System.out.println(" dealing with the curly brace problem. Exit code is zero on success,");
System.out.println(" nonzero otherwise.");
System.out.println("");
System.out.println(" You may find it helpful to use `--find-some-non-tmw' mode before doing a");
System.out.println(" conversion so that you have confidence in the conversion's correctness.");
System.exit(77);
}
if (args[0].equals("--version") || args[0].equals("-v")) {
System.out.println("TMW_RTF_TO_THDL_WYLIE version 0.8");
System.exit(77);
}
String tmwRtfPath = args[args.length - 1];
DuffPane dp = new DuffPane();
// Read in the rtf file.
{
InputStream in = new FileInputStream(tmwRtfPath);
try {
dp.rtfEd.read(in, dp.getDocument(), 0);
} catch (Exception e) {
System.out.println("TMW_RTF_TO_THDL_WYLIE:\n"
+ rtfErrorMessage);
System.exit(3);
}
in.close();
}
if (findAllNonTMWMode) {
// 0, -1 is the entire document.
System.exit(((TibetanDocument)dp.getDocument()).findAllNonTMWCharacters(0, -1));
} else if (findSomeNonTMWMode) {
// 0, -1 is the entire document.
System.exit(((TibetanDocument)dp.getDocument()).findSomeNonTMWCharacters(0, -1));
} else { // conversion mode
// Fix curly braces in the entire document:
((TibetanDocument)dp.getDocument()).replaceTahomaCurlyBracesAndBackslashes(0, -1);
// Convert to THDL Wylie:
dp.toWylie(0, dp.getDocument().getLength());
// Write to standard output the result:
((TibetanDocument)dp.getDocument()).writeRTFOutputStream(System.out);
// Exit normally:
System.exit(0);
}
} catch (ThdlLazyException e) {
System.out.println("TMW_RTF_TO_THDL_WYLIE has a BUG:");
e.getRealException().printStackTrace(System.out);
System.exit(1);
} catch (IOException e) {
e.printStackTrace();
System.exit(4);
}
}
}

View file

@ -17,17 +17,20 @@
<body bgcolor="white">
Provides classes and methods for inputting Tibetan text.
<p>
Designed for use with the Tibetan Computer
Company's free cross-platform TibetanMachineWeb fonts, this package
contains methods for inputting Tibetan using various keyboard
input methods, including true Wylie-based input, as well as
user-defined keyboards.
Designed for use with the Tibetan Computer Company's free
cross-platform Tibetan Machine Web fonts, this package contains
methods for inputting Tibetan using various keyboard input methods,
including true Wylie-based input, as well as user-defined keyboards.
<p>
The package includes a simple Tibetan text editor, Jskad,
which can be run as an local application or embedded in a
web page. Jskad supports a wide range of functions, including
conversion back and forth between TibetanMachineWeb and
Extended Wylie.
The package includes a simple Tibetan text editor, Jskad, which can be
run as an local application or embedded in a web page.&nbsp; Jskad
supports a wide range of functions, including conversion back and
forth between Tibetan Machine Web and Extended Wylie.
<p>
Also included is TMW_RTF_TO_THDL_WYLIE, a command-line utility for
converting Rich Text Format (RTF)a documents that use the Tibetan
Machine Web fonts into Extended Wylie.&nbsp; This utility is aware of
quirks in Java Swing's RTF support and works around them.
<p>
<h2>Related Documentation</h2>
@see <a href="../text/package-summary.html">org.thdl.tib.text</a>